diff --git a/examples/modelconfig-with-tls.yaml b/examples/modelconfig-with-tls.yaml index 7f4905e1d7..c59ec3b1bc 100644 --- a/examples/modelconfig-with-tls.yaml +++ b/examples/modelconfig-with-tls.yaml @@ -386,5 +386,5 @@ roleRef: # 6. Troubleshooting: # - See https://kagent.dev/docs for detailed debugging steps # - Check agent logs: kubectl logs deployment/agent- -# - Verify Secret is mounted: kubectl exec deployment/agent- -- ls /etc/ssl/certs/custom/ -# - Test certificate: kubectl exec deployment/agent- -- openssl x509 -in /etc/ssl/certs/custom/ca.crt -text -noout +# - Verify Secret is mounted: kubectl exec deployment/agent- -- ls /etc/ssl/certs/custom/corp-ca/ +# - Test certificate: kubectl exec deployment/agent- -- openssl x509 -in /etc/ssl/certs/custom/corp-ca/ca.crt -text -noout diff --git a/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml b/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml index 00b21b6da0..5aa3eeff71 100644 --- a/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml +++ b/go/api/config/crd/bases/kagent.dev_modelconfigs.yaml @@ -652,17 +652,18 @@ spec: properties: caCertSecretKey: description: |- - CACertSecretKey is the key within the Secret that contains the CA certificate data. - This field follows the same pattern as APIKeySecretKey. - Required when CACertSecretRef is set (unless DisableVerify is true). + CACertSecretKey is the key within the Secret that contains the + CA certificate data (PEM-encoded). Required when CACertSecretRef + is set (unless DisableVerify is true). type: string caCertSecretRef: description: |- CACertSecretRef is a reference to a Kubernetes Secret containing CA certificate(s) in PEM format. The Secret must be in the same - namespace as the ModelConfig. - When set, the certificate will be used to verify the provider's SSL certificate. - This field follows the same pattern as APIKeySecret. + namespace as the resource referencing it (ModelConfig, + RemoteMCPServer, or any future consumer of TLSConfig). + When set, the certificate will be used to verify the upstream's + SSL certificate. type: string disableSystemCAs: default: false @@ -682,6 +683,20 @@ spec: Production deployments MUST use proper certificates. type: boolean type: object + x-kubernetes-validations: + - message: caCertSecretKey requires caCertSecretRef + rule: '!(has(self.caCertSecretKey) && size(self.caCertSecretKey) + > 0 && (!has(self.caCertSecretRef) || size(self.caCertSecretRef) + == 0))' + - message: caCertSecretRef requires caCertSecretKey + rule: '!(has(self.caCertSecretRef) && size(self.caCertSecretRef) + > 0 && (!has(self.caCertSecretKey) || size(self.caCertSecretKey) + == 0))' + - message: disableSystemCAs requires caCertSecretRef or disableVerify + (trust-nothing config rejects every upstream) + rule: '!(has(self.disableSystemCAs) && self.disableSystemCAs && + (!has(self.disableVerify) || !self.disableVerify) && (!has(self.caCertSecretRef) + || size(self.caCertSecretRef) == 0))' required: - model type: object @@ -719,22 +734,6 @@ spec: rule: '!(has(self.apiKeyPassthrough) && self.apiKeyPassthrough && (self.provider == ''Gemini'' || self.provider == ''GeminiVertexAI'' || self.provider == ''AnthropicVertexAI''))' - - message: caCertSecretKey requires caCertSecretRef - rule: '!(has(self.tls) && has(self.tls.caCertSecretKey) && size(self.tls.caCertSecretKey) - > 0 && (!has(self.tls.caCertSecretRef) || size(self.tls.caCertSecretRef) - == 0))' - - message: caCertSecretKey requires caCertSecretRef (unless disableVerify - is true) - rule: '!(has(self.tls) && (!has(self.tls.disableVerify) || !self.tls.disableVerify) - && has(self.tls.caCertSecretKey) && size(self.tls.caCertSecretKey) - > 0 && (!has(self.tls.caCertSecretRef) || size(self.tls.caCertSecretRef) - == 0))' - - message: caCertSecretRef requires caCertSecretKey (unless disableVerify - is true) - rule: '!(has(self.tls) && (!has(self.tls.disableVerify) || !self.tls.disableVerify) - && has(self.tls.caCertSecretRef) && size(self.tls.caCertSecretRef) - > 0 && (!has(self.tls.caCertSecretKey) || size(self.tls.caCertSecretKey) - == 0))' - message: openAI.tokenExchange requires apiKeySecret (the service account secret) rule: '!(has(self.openAI) && has(self.openAI.tokenExchange) && (!has(self.apiKeySecret) diff --git a/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml b/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml index f8bbcf0b1d..4ff81906eb 100644 --- a/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml +++ b/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml @@ -179,6 +179,64 @@ spec: timeout: default: 30s type: string + tls: + description: |- + TLS configuration for the upstream MCP server connection. + Use this for HTTPS upstreams that present a certificate the agent's + system trust store does not include (corporate CA, self-signed cert + on a test fixture, internal MCP gateway). Reuses the same TLSConfig + type as ModelConfig.spec.tls, with identical semantics: disableVerify + turns off certificate validation entirely, caCertSecretRef + + caCertSecretKey point at a PEM bundle Secret in the same namespace, + and disableSystemCAs trusts only the named bundle. + properties: + caCertSecretKey: + description: |- + CACertSecretKey is the key within the Secret that contains the + CA certificate data (PEM-encoded). Required when CACertSecretRef + is set (unless DisableVerify is true). + type: string + caCertSecretRef: + description: |- + CACertSecretRef is a reference to a Kubernetes Secret containing + CA certificate(s) in PEM format. The Secret must be in the same + namespace as the resource referencing it (ModelConfig, + RemoteMCPServer, or any future consumer of TLSConfig). + When set, the certificate will be used to verify the upstream's + SSL certificate. + type: string + disableSystemCAs: + default: false + description: |- + DisableSystemCAs disables the use of system CA certificates. + When false (default), system CA certificates are used for verification (safe behavior). + When true, only the custom CA from CACertSecretRef is trusted. + This allows strict security policies where only corporate CAs should be trusted. + type: boolean + disableVerify: + default: false + description: |- + DisableVerify disables SSL certificate verification entirely. + When false (default), SSL certificates are verified. + When true, SSL certificate verification is disabled. + WARNING: This should ONLY be used in development/testing environments. + Production deployments MUST use proper certificates. + type: boolean + type: object + x-kubernetes-validations: + - message: caCertSecretKey requires caCertSecretRef + rule: '!(has(self.caCertSecretKey) && size(self.caCertSecretKey) + > 0 && (!has(self.caCertSecretRef) || size(self.caCertSecretRef) + == 0))' + - message: caCertSecretRef requires caCertSecretKey + rule: '!(has(self.caCertSecretRef) && size(self.caCertSecretRef) + > 0 && (!has(self.caCertSecretKey) || size(self.caCertSecretKey) + == 0))' + - message: disableSystemCAs requires caCertSecretRef or disableVerify + (trust-nothing config rejects every upstream) + rule: '!(has(self.disableSystemCAs) && self.disableSystemCAs && + (!has(self.disableVerify) || !self.disableVerify) && (!has(self.caCertSecretRef) + || size(self.caCertSecretRef) == 0))' url: minLength: 1 type: string @@ -263,6 +321,12 @@ spec: Important: Run "make" to regenerate code after modifying this file format: int64 type: integer + secretHash: + description: |- + SecretHash stores a hash of the TLS Secret referenced by spec.tls so + agents that consume this RemoteMCPServer can detect cert rotation and + roll on the next reconcile. Empty when spec.tls.caCertSecretRef is unset. + type: string type: object type: object served: true diff --git a/go/api/v1alpha2/modelconfig_types.go b/go/api/v1alpha2/modelconfig_types.go index 0d08928681..b7de32aac0 100644 --- a/go/api/v1alpha2/modelconfig_types.go +++ b/go/api/v1alpha2/modelconfig_types.go @@ -274,9 +274,14 @@ type SAPAICoreConfig struct { AuthURL string `json:"authUrl,omitempty"` } -// TLSConfig contains TLS/SSL configuration options for model provider connections. -// This enables agents to connect to internal LiteLLM gateways or other providers -// that use self-signed certificates or custom certificate authorities. +// TLSConfig contains TLS/SSL configuration options for outbound HTTPS +// connections from the agent (model provider, RemoteMCPServer). The +// XValidation rules below apply at admission to every CRD field that +// uses TLSConfig, so callers don't need to re-declare them per spec. +// +// +kubebuilder:validation:XValidation:message="caCertSecretKey requires caCertSecretRef",rule="!(has(self.caCertSecretKey) && size(self.caCertSecretKey) > 0 && (!has(self.caCertSecretRef) || size(self.caCertSecretRef) == 0))" +// +kubebuilder:validation:XValidation:message="caCertSecretRef requires caCertSecretKey",rule="!(has(self.caCertSecretRef) && size(self.caCertSecretRef) > 0 && (!has(self.caCertSecretKey) || size(self.caCertSecretKey) == 0))" +// +kubebuilder:validation:XValidation:message="disableSystemCAs requires caCertSecretRef or disableVerify (trust-nothing config rejects every upstream)",rule="!(has(self.disableSystemCAs) && self.disableSystemCAs && (!has(self.disableVerify) || !self.disableVerify) && (!has(self.caCertSecretRef) || size(self.caCertSecretRef) == 0))" type TLSConfig struct { // DisableVerify disables SSL certificate verification entirely. // When false (default), SSL certificates are verified. @@ -289,15 +294,16 @@ type TLSConfig struct { // CACertSecretRef is a reference to a Kubernetes Secret containing // CA certificate(s) in PEM format. The Secret must be in the same - // namespace as the ModelConfig. - // When set, the certificate will be used to verify the provider's SSL certificate. - // This field follows the same pattern as APIKeySecret. + // namespace as the resource referencing it (ModelConfig, + // RemoteMCPServer, or any future consumer of TLSConfig). + // When set, the certificate will be used to verify the upstream's + // SSL certificate. // +optional CACertSecretRef string `json:"caCertSecretRef,omitempty"` - // CACertSecretKey is the key within the Secret that contains the CA certificate data. - // This field follows the same pattern as APIKeySecretKey. - // Required when CACertSecretRef is set (unless DisableVerify is true). + // CACertSecretKey is the key within the Secret that contains the + // CA certificate data (PEM-encoded). Required when CACertSecretRef + // is set (unless DisableVerify is true). // +optional CACertSecretKey string `json:"caCertSecretKey,omitempty"` @@ -310,6 +316,19 @@ type TLSConfig struct { DisableSystemCAs bool `json:"disableSystemCAs,omitempty"` } +// IsEmpty reports whether the TLSConfig carries any opinion. A nil +// receiver and an all-zero struct are equivalent — both mean "no TLS +// config supplied" and the consumer should fall back to its default +// behavior (typically system trust store, default httpx client). The +// single helper keeps callers from re-listing fields, so adding a new +// field to TLSConfig only requires updating this method. +func (t *TLSConfig) IsEmpty() bool { + if t == nil { + return true + } + return !t.DisableVerify && t.CACertSecretRef == "" && t.CACertSecretKey == "" && !t.DisableSystemCAs +} + // ModelConfigSpec defines the desired state of ModelConfig. // // +kubebuilder:validation:XValidation:message="provider.openAI must be nil if the provider is not OpenAI",rule="!(has(self.openAI) && self.provider != 'OpenAI')" @@ -325,9 +344,6 @@ type TLSConfig struct { // +kubebuilder:validation:XValidation:message="apiKeySecretKey must be set if apiKeySecret is set (except for Bedrock and SAPAICore providers)",rule="!(has(self.apiKeySecret) && !has(self.apiKeySecretKey) && self.provider != 'Bedrock' && self.provider != 'SAPAICore')" // +kubebuilder:validation:XValidation:message="apiKeyPassthrough and apiKeySecret are mutually exclusive",rule="!(has(self.apiKeyPassthrough) && self.apiKeyPassthrough && has(self.apiKeySecret) && size(self.apiKeySecret) > 0)" // +kubebuilder:validation:XValidation:message="apiKeyPassthrough must be false if provider is Gemini;GeminiVertexAI;AnthropicVertexAI",rule="!(has(self.apiKeyPassthrough) && self.apiKeyPassthrough && (self.provider == 'Gemini' || self.provider == 'GeminiVertexAI' || self.provider == 'AnthropicVertexAI'))" -// +kubebuilder:validation:XValidation:message="caCertSecretKey requires caCertSecretRef",rule="!(has(self.tls) && has(self.tls.caCertSecretKey) && size(self.tls.caCertSecretKey) > 0 && (!has(self.tls.caCertSecretRef) || size(self.tls.caCertSecretRef) == 0))" -// +kubebuilder:validation:XValidation:message="caCertSecretKey requires caCertSecretRef (unless disableVerify is true)",rule="!(has(self.tls) && (!has(self.tls.disableVerify) || !self.tls.disableVerify) && has(self.tls.caCertSecretKey) && size(self.tls.caCertSecretKey) > 0 && (!has(self.tls.caCertSecretRef) || size(self.tls.caCertSecretRef) == 0))" -// +kubebuilder:validation:XValidation:message="caCertSecretRef requires caCertSecretKey (unless disableVerify is true)",rule="!(has(self.tls) && (!has(self.tls.disableVerify) || !self.tls.disableVerify) && has(self.tls.caCertSecretRef) && size(self.tls.caCertSecretRef) > 0 && (!has(self.tls.caCertSecretKey) || size(self.tls.caCertSecretKey) == 0))" // +kubebuilder:validation:XValidation:message="openAI.tokenExchange requires apiKeySecret (the service account secret)",rule="!(has(self.openAI) && has(self.openAI.tokenExchange) && (!has(self.apiKeySecret) || size(self.apiKeySecret) == 0))" // +kubebuilder:validation:XValidation:message="openAI.tokenExchange and apiKeyPassthrough are mutually exclusive",rule="!(has(self.openAI) && has(self.openAI.tokenExchange) && has(self.apiKeyPassthrough) && self.apiKeyPassthrough)" // +kubebuilder:validation:XValidation:message="openAI.tokenExchange type GDCHServiceAccount requires openAI.tokenExchange.gdchServiceAccount",rule="!(has(self.openAI) && has(self.openAI.tokenExchange) && self.openAI.tokenExchange.type == 'GDCHServiceAccount' && !has(self.openAI.tokenExchange.gdchServiceAccount))" diff --git a/go/api/v1alpha2/remotemcpserver_types.go b/go/api/v1alpha2/remotemcpserver_types.go index 0d58952eec..aaa15125be 100644 --- a/go/api/v1alpha2/remotemcpserver_types.go +++ b/go/api/v1alpha2/remotemcpserver_types.go @@ -63,6 +63,17 @@ type RemoteMCPServerSpec struct { // See: https://gateway-api.sigs.k8s.io/guides/multiple-ns/#cross-namespace-routing // +optional AllowedNamespaces *AllowedNamespaces `json:"allowedNamespaces,omitempty"` + + // TLS configuration for the upstream MCP server connection. + // Use this for HTTPS upstreams that present a certificate the agent's + // system trust store does not include (corporate CA, self-signed cert + // on a test fixture, internal MCP gateway). Reuses the same TLSConfig + // type as ModelConfig.spec.tls, with identical semantics: disableVerify + // turns off certificate validation entirely, caCertSecretRef + + // caCertSecretKey point at a PEM bundle Secret in the same namespace, + // and disableSystemCAs trusts only the named bundle. + // +optional + TLS *TLSConfig `json:"tls,omitempty"` } var _ sql.Scanner = (*RemoteMCPServerSpec)(nil) @@ -91,6 +102,11 @@ type RemoteMCPServerStatus struct { Conditions []metav1.Condition `json:"conditions,omitempty"` // +optional DiscoveredTools []*MCPTool `json:"discoveredTools,omitempty"` + // SecretHash stores a hash of the TLS Secret referenced by spec.tls so + // agents that consume this RemoteMCPServer can detect cert rotation and + // roll on the next reconcile. Empty when spec.tls.caCertSecretRef is unset. + // +optional + SecretHash string `json:"secretHash,omitempty"` } type MCPTool struct { diff --git a/go/api/v1alpha2/zz_generated.deepcopy.go b/go/api/v1alpha2/zz_generated.deepcopy.go index 136058bced..4cef11d501 100644 --- a/go/api/v1alpha2/zz_generated.deepcopy.go +++ b/go/api/v1alpha2/zz_generated.deepcopy.go @@ -1394,6 +1394,11 @@ func (in *RemoteMCPServerSpec) DeepCopyInto(out *RemoteMCPServerSpec) { *out = new(AllowedNamespaces) (*in).DeepCopyInto(*out) } + if in.TLS != nil { + in, out := &in.TLS, &out.TLS + *out = new(TLSConfig) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteMCPServerSpec. diff --git a/go/core/internal/controller/reconciler/mcp_server_reconciler_test.go b/go/core/internal/controller/reconciler/mcp_server_reconciler_test.go index 9514ca6f2c..b3cb19c7bf 100644 --- a/go/core/internal/controller/reconciler/mcp_server_reconciler_test.go +++ b/go/core/internal/controller/reconciler/mcp_server_reconciler_test.go @@ -116,6 +116,7 @@ func TestReconcileKagentMCPServer_ErrorPropagation(t *testing.T) { types.NamespacedName{Namespace: "test", Name: "default-model"}, []string{}, // No namespace restrictions for tests nil, + nil, ) // Call ReconcileKagentMCPServer diff --git a/go/core/internal/controller/reconciler/reconciler.go b/go/core/internal/controller/reconciler/reconciler.go index 0b2d93e282..0e697d9fcb 100644 --- a/go/core/internal/controller/reconciler/reconciler.go +++ b/go/core/internal/controller/reconciler/reconciler.go @@ -3,6 +3,8 @@ package reconciler import ( "context" "crypto/sha256" + "crypto/tls" + "crypto/x509" "encoding/hex" "errors" "fmt" @@ -16,6 +18,7 @@ import ( reconcilerutils "github.com/kagent-dev/kagent/go/core/internal/controller/reconciler/utils" "github.com/kagent-dev/kagent/go/core/internal/controller/translator" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + pkgtranslator "github.com/kagent-dev/kagent/go/core/pkg/translator" "github.com/kagent-dev/kmcp/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -90,23 +93,30 @@ type kagentReconciler struct { watchedNamespaces []string sandboxBackend sandboxbackend.Backend + + // rmsURLRewriter optionally transforms the URL the controller dials + // when discovering tools on a RemoteMCPServer. Nil means dial + // s.Spec.URL verbatim. See pkgtranslator.RemoteMCPServerURLRewriter. + rmsURLRewriter pkgtranslator.RemoteMCPServerURLRewriter } func NewKagentReconciler( - translator agent_translator.AdkApiTranslator, + adkTranslator agent_translator.AdkApiTranslator, kube client.Client, dbClient database.Client, defaultModelConfig types.NamespacedName, watchedNamespaces []string, sandboxBackend sandboxbackend.Backend, + rmsURLRewriter pkgtranslator.RemoteMCPServerURLRewriter, ) KagentReconciler { return &kagentReconciler{ - adkTranslator: translator, + adkTranslator: adkTranslator, kube: kube, dbClient: dbClient, defaultModelConfig: defaultModelConfig, watchedNamespaces: watchedNamespaces, sandboxBackend: sandboxBackend, + rmsURLRewriter: rmsURLRewriter, } } @@ -427,6 +437,18 @@ func (a *kagentReconciler) ReconcileKagentModelConfig(ctx context.Context, req c if kubeErr := a.kube.Get(ctx, namespacedName, secret); kubeErr != nil { err = multierror.Append(err, fmt.Errorf("failed to get secret %s: %w", modelConfig.Spec.TLS.CACertSecretRef, kubeErr)) } else { + // Surface the misconfiguration on the ModelConfig's Accepted + // condition: mounting an absent key would crash the agent at + // startup with FileNotFoundError without explaining which + // resource owns the bad reference. Translation still proceeds + // using whatever path the spec derived — agents fail loudly + // at startup, which matches the existing "missing-Secret" + // surface above. + if modelConfig.Spec.TLS.CACertSecretKey != "" { + if _, ok := secret.Data[modelConfig.Spec.TLS.CACertSecretKey]; !ok { + err = multierror.Append(err, fmt.Errorf("tls secret %s does not contain key %q", modelConfig.Spec.TLS.CACertSecretRef, modelConfig.Spec.TLS.CACertSecretKey)) + } + } secrets = append(secrets, secretRef{ NamespacedName: namespacedName, Secret: secret, @@ -594,6 +616,15 @@ func (a *kagentReconciler) ReconcileKagentRemoteMCPServer(ctx context.Context, r GroupKind: server.GroupVersionKind().GroupKind().String(), } + // Compute the TLS-Secret hash before tool discovery so the status + // reflects the operator's current spec.tls.caCertSecretRef contents. + // Agents that mount this Secret read the hash from Status.SecretHash + // at translate time so an in-place cert rotation triggers a rollout + // even when the Secret name (and therefore the cert path) didn't + // change. Missing-Secret errors are folded into the registration + // error path so a misconfigured RMS still surfaces a Failed condition. + secretHash, secretErr := a.computeRemoteMCPServerSecretHash(ctx, server) + l.Info("registering remote MCP server", "url", server.Spec.URL, "protocol", server.Spec.Protocol) start := time.Now() tools, err := a.upsertToolServerForRemoteMCPServer(ctx, dbServer, server) @@ -609,12 +640,16 @@ func (a *kagentReconciler) ReconcileKagentRemoteMCPServer(ctx context.Context, r } else { l.Info("successfully registered remote MCP server", "url", server.Spec.URL, "toolCount", len(tools), "duration", time.Since(start)) } + if secretErr != nil { + err = multierror.Append(err, secretErr) + } // update the tool server status as the agents depend on it if err := a.reconcileRemoteMCPServerStatus( ctx, server, tools, + secretHash, err, ); err != nil { return fmt.Errorf("failed to reconcile remote mcp server status %s: %w", req.NamespacedName, err) @@ -623,10 +658,37 @@ func (a *kagentReconciler) ReconcileKagentRemoteMCPServer(ctx context.Context, r return nil } +// computeRemoteMCPServerSecretHash returns a hash over the TLS Secret +// referenced by spec.tls.caCertSecretRef, matching the shape used by +// ModelConfig's secret hash so agents can detect cert rotation. Returns +// the empty string (no error) when no TLS Secret is referenced. Also +// validates that the named key exists in the Secret so the operator +// gets a clear Accepted=false on the RMS rather than a startup crash +// (FileNotFoundError from the Python ADK) on every consuming agent — +// mirrors the equivalent check in ReconcileKagentModelConfig. +func (a *kagentReconciler) computeRemoteMCPServerSecretHash(ctx context.Context, server *v1alpha2.RemoteMCPServer) (string, error) { + tlsSpec := server.Spec.TLS + if tlsSpec == nil || tlsSpec.CACertSecretRef == "" { + return "", nil + } + secret := &corev1.Secret{} + nn := types.NamespacedName{Namespace: server.Namespace, Name: tlsSpec.CACertSecretRef} + if err := a.kube.Get(ctx, nn, secret); err != nil { + return "", fmt.Errorf("failed to get TLS secret %s: %w", tlsSpec.CACertSecretRef, err) + } + if tlsSpec.CACertSecretKey != "" { + if _, ok := secret.Data[tlsSpec.CACertSecretKey]; !ok { + return "", fmt.Errorf("tls secret %s does not contain key %q", tlsSpec.CACertSecretRef, tlsSpec.CACertSecretKey) + } + } + return computeStatusSecretHash([]secretRef{{NamespacedName: nn, Secret: secret}}), nil +} + func (a *kagentReconciler) reconcileRemoteMCPServerStatus( ctx context.Context, server *v1alpha2.RemoteMCPServer, discoveredTools []*v1alpha2.MCPTool, + secretHash string, err error, ) error { var ( @@ -654,12 +716,14 @@ func (a *kagentReconciler) reconcileRemoteMCPServerStatus( // only update if the status has changed to prevent looping the reconciler if !conditionChanged && server.Status.ObservedGeneration == server.Generation && + server.Status.SecretHash == secretHash && reflect.DeepEqual(server.Status.DiscoveredTools, discoveredTools) { return nil } server.Status.ObservedGeneration = server.Generation server.Status.DiscoveredTools = discoveredTools + server.Status.SecretHash = secretHash if err := a.kube.Status().Update(ctx, server); err != nil { return fmt.Errorf("failed to update remote mcp server status: %w", err) @@ -1028,35 +1092,125 @@ func (a *kagentReconciler) createMcpTransport(ctx context.Context, s *v1alpha2.R return nil, err } - httpClient := newHTTPClient(headers, remoteMCPRegistrationTimeout(s)) + // Resolve the dial-time URL. Default is s.Spec.URL; an installed + // rmsURLRewriter may substitute a different dial target. The + // rewriter controls the URL only — tlsConfig below is built from + // s.Spec.TLS regardless of what the rewriter returns, so rewriting + // http:// → https:// to a destination needing different trust + // roots is not supported (s.Spec.TLS would need to already be + // consistent with the new destination). + endpoint := s.Spec.URL + if a.rmsURLRewriter != nil { + rewritten, err := a.rmsURLRewriter.RewriteRemoteMCPServerURL(ctx, s) + if err != nil { + return nil, fmt.Errorf("failed to rewrite RemoteMCPServer URL for %s/%s: %w", s.Namespace, s.Name, err) + } + endpoint = rewritten + } + + tlsConfig, err := a.buildRemoteMCPServerTLSConfig(ctx, s) + if err != nil { + return nil, fmt.Errorf("failed to build TLS config for %s/%s: %w", s.Namespace, s.Name, err) + } + + httpClient := newHTTPClient(headers, remoteMCPRegistrationTimeout(s), tlsConfig) switch s.Spec.Protocol { case v1alpha2.RemoteMCPServerProtocolSse: return &mcp.SSEClientTransport{ - Endpoint: s.Spec.URL, + Endpoint: endpoint, HTTPClient: httpClient, }, nil default: return &mcp.StreamableClientTransport{ - Endpoint: s.Spec.URL, + Endpoint: endpoint, HTTPClient: httpClient, }, nil } } +// buildRemoteMCPServerTLSConfig returns a *tls.Config matching the +// RemoteMCPServer's spec.tls (or nil when no TLS config is present, so +// the http.Client falls back to Go's default transport with the system +// trust store). Mirrors the per-source TLS semantics the agent +// translator emits — disableVerify, custom CA from a Secret, and +// disableSystemCAs trust-only-the-named-bundle — so tool discovery +// trusts the same upstream chain the agent will trust at runtime. +func (a *kagentReconciler) buildRemoteMCPServerTLSConfig(ctx context.Context, s *v1alpha2.RemoteMCPServer) (*tls.Config, error) { + tlsSpec := s.Spec.TLS + if tlsSpec.IsEmpty() { + return nil, nil + } + + cfg := &tls.Config{ + InsecureSkipVerify: tlsSpec.DisableVerify, //nolint:gosec // operator-authored test-fixture escape hatch + } + + if tlsSpec.CACertSecretRef != "" && tlsSpec.CACertSecretKey != "" { + secret := &corev1.Secret{} + if err := a.kube.Get(ctx, types.NamespacedName{Namespace: s.Namespace, Name: tlsSpec.CACertSecretRef}, secret); err != nil { + return nil, fmt.Errorf("failed to read CA secret %s/%s: %w", s.Namespace, tlsSpec.CACertSecretRef, err) + } + pem, ok := secret.Data[tlsSpec.CACertSecretKey] + if !ok || len(pem) == 0 { + return nil, fmt.Errorf("CA secret %s/%s does not contain key %q", s.Namespace, tlsSpec.CACertSecretRef, tlsSpec.CACertSecretKey) + } + + var pool *x509.CertPool + if tlsSpec.DisableSystemCAs { + pool = x509.NewCertPool() + } else { + sys, err := x509.SystemCertPool() + if err != nil { + return nil, fmt.Errorf("failed to load system CA pool: %w", err) + } + pool = sys + } + if !pool.AppendCertsFromPEM(pem) { + return nil, fmt.Errorf("CA secret %s/%s key %q does not contain valid PEM certificates", s.Namespace, tlsSpec.CACertSecretRef, tlsSpec.CACertSecretKey) + } + cfg.RootCAs = pool + } + // Note: the trust-nothing combination (disableSystemCAs=true without + // caCertSecretRef and without disableVerify) is rejected by the CEL + // rule on TLSConfig at admission, so it cannot reach this code path + // in production. + + return cfg, nil +} + // go-sdk does not have a WithHeaders option when initializing transport -// so we need to create a custom HTTP client that adds headers to all requests. -func newHTTPClient(headers map[string]string, timeout time.Duration) *http.Client { +// so we need to create a custom HTTP client that adds headers to all +// requests. When tlsConfig is non-nil it's installed on a cloned +// transport so tool discovery honors RemoteMCPServer.spec.tls. +func newHTTPClient(headers map[string]string, timeout time.Duration, tlsConfig *tls.Config) *http.Client { + var base = http.DefaultTransport + if tlsConfig != nil { + // Clone the default transport to preserve its dial/keepalive + // settings (proxies, dual-stack, HTTP/2) and override only the + // TLS config. If the default transport isn't *http.Transport + // (extremely unusual — only happens if something earlier swapped + // it out), fall back to a fresh transport so TLS still applies. + if t, ok := http.DefaultTransport.(*http.Transport); ok { + clone := t.Clone() + clone.TLSClientConfig = tlsConfig + base = clone + } else { + base = &http.Transport{TLSClientConfig: tlsConfig} + } + } + if len(headers) == 0 { return &http.Client{ - Timeout: timeout, + Timeout: timeout, + Transport: base, } } return &http.Client{ Timeout: timeout, Transport: &headerTransport{ headers: headers, - base: http.DefaultTransport, + base: base, }, } } diff --git a/go/core/internal/controller/reconciler/reconciler_test.go b/go/core/internal/controller/reconciler/reconciler_test.go index d9a4860a93..0d69b455d6 100644 --- a/go/core/internal/controller/reconciler/reconciler_test.go +++ b/go/core/internal/controller/reconciler/reconciler_test.go @@ -2,11 +2,20 @@ package reconciler import ( "context" + "crypto/rand" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "math/big" + "net/http" "testing" "time" "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/internal/utils" + "github.com/modelcontextprotocol/go-sdk/mcp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -674,6 +683,91 @@ func TestValidateCrossNamespaceReferences(t *testing.T) { } } +// TestComputeRemoteMCPServerSecretHash pins the controller-side shape of +// the RMS TLS-Secret hash: the empty string when no TLS Secret is +// referenced, a deterministic hex string when one is, and a different +// hex string after the Secret contents rotate. Agents fold this value +// into their config-hash so an in-place cert rotation triggers a +// rollout (the Python ADK loads the cert at startup, so without a +// rollout pods keep the stale trust chain in memory). +func TestComputeRemoteMCPServerSecretHash(t *testing.T) { + scheme := clientgoscheme.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + caV1 := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-ca", Namespace: "ns"}, + Data: map[string][]byte{"ca.crt": []byte("PEM-V1")}, + } + caV2 := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-ca", Namespace: "ns"}, + Data: map[string][]byte{"ca.crt": []byte("PEM-V2")}, + } + + rmsNoTLS := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "x", Namespace: "ns"}, + Spec: v1alpha2.RemoteMCPServerSpec{URL: "https://x/y"}, + } + rmsWithTLS := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "x", Namespace: "ns"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + URL: "https://x/y", + TLS: &v1alpha2.TLSConfig{CACertSecretRef: "corp-ca", CACertSecretKey: "ca.crt"}, + }, + } + + t.Run("no TLS → empty hash", func(t *testing.T) { + kube := fake.NewClientBuilder().WithScheme(scheme).Build() + r := &kagentReconciler{kube: kube} + hash, err := r.computeRemoteMCPServerSecretHash(context.Background(), rmsNoTLS) + require.NoError(t, err) + assert.Empty(t, hash) + }) + + t.Run("missing secret → error", func(t *testing.T) { + kube := fake.NewClientBuilder().WithScheme(scheme).Build() + r := &kagentReconciler{kube: kube} + _, err := r.computeRemoteMCPServerSecretHash(context.Background(), rmsWithTLS) + require.Error(t, err) + }) + + t.Run("missing key in present secret → error", func(t *testing.T) { + // Secret exists but has the wrong key — the operator typo'd + // caCertSecretKey. Without this guard the agent would mount + // the Secret, fail to find the file at startup, and produce a + // FileNotFoundError that doesn't identify the resource owner. + // Surfacing the error on the RMS's Accepted condition gives + // the operator a precise pointer. + wrongKey := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-ca", Namespace: "ns"}, + Data: map[string][]byte{"other.crt": []byte("PEM")}, + } + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(wrongKey).Build() + r := &kagentReconciler{kube: kube} + _, err := r.computeRemoteMCPServerSecretHash(context.Background(), rmsWithTLS) + require.Error(t, err) + assert.Contains(t, err.Error(), "ca.crt") + }) + + t.Run("present secret → stable hex; rotation → different hex", func(t *testing.T) { + kube1 := fake.NewClientBuilder().WithScheme(scheme).WithObjects(caV1.DeepCopy()).Build() + kube2 := fake.NewClientBuilder().WithScheme(scheme).WithObjects(caV2.DeepCopy()).Build() + r1 := &kagentReconciler{kube: kube1} + r2 := &kagentReconciler{kube: kube2} + + h1, err := r1.computeRemoteMCPServerSecretHash(context.Background(), rmsWithTLS) + require.NoError(t, err) + require.NotEmpty(t, h1) + + h1Again, err := r1.computeRemoteMCPServerSecretHash(context.Background(), rmsWithTLS) + require.NoError(t, err) + assert.Equal(t, h1, h1Again, "same Secret content must produce identical hash") + + h2, err := r2.computeRemoteMCPServerSecretHash(context.Background(), rmsWithTLS) + require.NoError(t, err) + assert.NotEqual(t, h1, h2, "rotating the Secret content must change the hash") + }) +} + // TestRemoteMCPRegistrationTimeout verifies that remoteMCPRegistrationTimeout // returns spec.timeout when set and falls back to the package default otherwise. func TestRemoteMCPRegistrationTimeout(t *testing.T) { @@ -717,19 +811,300 @@ func TestNewHTTPClient(t *testing.T) { timeout := 5 * time.Second t.Run("no headers", func(t *testing.T) { - c := newHTTPClient(nil, timeout) + c := newHTTPClient(nil, timeout, nil) assert.Equal(t, timeout, c.Timeout) }) t.Run("empty headers", func(t *testing.T) { - c := newHTTPClient(map[string]string{}, timeout) + c := newHTTPClient(map[string]string{}, timeout, nil) assert.Equal(t, timeout, c.Timeout) }) t.Run("with headers sets timeout and custom transport", func(t *testing.T) { - c := newHTTPClient(map[string]string{"X-Key": "val"}, timeout) + c := newHTTPClient(map[string]string{"X-Key": "val"}, timeout, nil) assert.Equal(t, timeout, c.Timeout) _, ok := c.Transport.(*headerTransport) assert.True(t, ok, "expected headerTransport") }) + + t.Run("with tls config installs cloned transport", func(t *testing.T) { + tlsCfg := &tls.Config{InsecureSkipVerify: true} //nolint:gosec // test only + c := newHTTPClient(nil, timeout, tlsCfg) + require.Equal(t, timeout, c.Timeout) + // No headers → transport is the cloned *http.Transport directly. + tr, ok := c.Transport.(*http.Transport) + require.True(t, ok, "expected *http.Transport when no headers + tls is set") + assert.Same(t, tlsCfg, tr.TLSClientConfig) + }) + + t.Run("with headers and tls config wraps headerTransport over cloned transport", func(t *testing.T) { + tlsCfg := &tls.Config{InsecureSkipVerify: true} //nolint:gosec // test only + c := newHTTPClient(map[string]string{"X-Key": "val"}, timeout, tlsCfg) + ht, ok := c.Transport.(*headerTransport) + require.True(t, ok, "expected headerTransport") + tr, ok := ht.base.(*http.Transport) + require.True(t, ok, "headerTransport.base should be the cloned *http.Transport") + assert.Same(t, tlsCfg, tr.TLSClientConfig) + }) +} + +// TestBuildRemoteMCPServerTLSConfig covers the controller's mirror of the +// agent translator's TLS semantics: tool discovery dials the upstream from +// the controller pod, so it has to construct the same trust chain the agent +// will use at runtime — but from the controller's vantage point (no Secret +// mounted on its filesystem; it has to read the Secret via the kube API). +func TestBuildRemoteMCPServerTLSConfig(t *testing.T) { + caPEM := generateTestCAPEM(t) + + tests := []struct { + name string + spec *v1alpha2.TLSConfig + secret *corev1.Secret + wantNil bool + wantSkip bool + wantCA bool + wantSystem bool + wantErr bool + errContains string + }{ + { + name: "nil spec → no tls config (default transport)", + spec: nil, + wantNil: true, + }, + { + name: "empty struct → no tls config (parity with nil)", + spec: &v1alpha2.TLSConfig{}, + wantNil: true, + }, + { + name: "disableVerify only", + spec: &v1alpha2.TLSConfig{DisableVerify: true}, + wantSkip: true, + }, + { + name: "custom CA only (additive to system pool)", + spec: &v1alpha2.TLSConfig{ + CACertSecretRef: "ca", + CACertSecretKey: "ca.crt", + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "ca", Namespace: "ns"}, + Data: map[string][]byte{"ca.crt": caPEM}, + }, + wantCA: true, + wantSystem: true, + }, + { + name: "custom CA with disableSystemCAs (trust only the bundle)", + spec: &v1alpha2.TLSConfig{ + CACertSecretRef: "ca", + CACertSecretKey: "ca.crt", + DisableSystemCAs: true, + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "ca", Namespace: "ns"}, + Data: map[string][]byte{"ca.crt": caPEM}, + }, + wantCA: true, + wantSystem: false, + }, + { + name: "missing secret → error", + spec: &v1alpha2.TLSConfig{ + CACertSecretRef: "ca", + CACertSecretKey: "ca.crt", + }, + wantErr: true, + errContains: "failed to read CA secret", + }, + { + name: "secret present but missing key → error", + spec: &v1alpha2.TLSConfig{ + CACertSecretRef: "ca", + CACertSecretKey: "ca.crt", + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "ca", Namespace: "ns"}, + Data: map[string][]byte{"other": []byte("x")}, + }, + wantErr: true, + errContains: "does not contain key", + }, + { + name: "secret key contains garbage → error", + spec: &v1alpha2.TLSConfig{ + CACertSecretRef: "ca", + CACertSecretKey: "ca.crt", + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "ca", Namespace: "ns"}, + Data: map[string][]byte{"ca.crt": []byte("not a pem")}, + }, + wantErr: true, + errContains: "valid PEM certificates", + }, + // Note: the trust-nothing combination (disableSystemCAs=true alone) + // used to be rejected here at reconcile time. It's now rejected + // earlier by the CEL rule on TLSConfig at admission, so it cannot + // reach this code path. No runtime test needed. + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := clientgoscheme.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + objs := []client.Object{} + if tt.secret != nil { + objs = append(objs, tt.secret) + } + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() + + r := &kagentReconciler{kube: kube} + rms := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "ns"}, + Spec: v1alpha2.RemoteMCPServerSpec{URL: "https://x/y", TLS: tt.spec}, + } + + cfg, err := r.buildRemoteMCPServerTLSConfig(context.Background(), rms) + if tt.wantErr { + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errContains) + return + } + require.NoError(t, err) + + if tt.wantNil { + assert.Nil(t, cfg) + return + } + require.NotNil(t, cfg) + assert.Equal(t, tt.wantSkip, cfg.InsecureSkipVerify) + if tt.wantCA { + require.NotNil(t, cfg.RootCAs, "expected RootCAs populated when CA Secret is referenced") + } + // Asserting "system pool was used" by counting subjects is + // platform-specific — on macOS, x509.SystemCertPool returns + // a minimal pool because Go defers to platform verification. + // The path is structurally enforced (SystemCertPool vs + // NewCertPool) and the strict-trust case (wantSystem=false) + // covers the alternative branch. + _ = tt.wantSystem + }) + } +} + +// stubRewriter is a test double for translator.RemoteMCPServerURLRewriter. +// Records the call and returns the configured response, letting tests +// verify createMcpTransport plumbs the rewriter correctly. +type stubRewriter struct { + called bool + gotRMS *v1alpha2.RemoteMCPServer + respURL string + respErr error +} + +func (s *stubRewriter) RewriteRemoteMCPServerURL(_ context.Context, rms *v1alpha2.RemoteMCPServer) (string, error) { + s.called = true + s.gotRMS = rms + return s.respURL, s.respErr +} + +// TestCreateMcpTransport_URLRewriter covers the optional URL-rewriter +// hook on the reconciler: a configured rewriter must (a) receive the +// RemoteMCPServer, (b) supply the dial-time URL on the produced +// transport, and (c) propagate errors. A nil rewriter must leave +// s.Spec.URL as the dial target. +func TestCreateMcpTransport_URLRewriter(t *testing.T) { + scheme := clientgoscheme.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + const specURL = "https://upstream.example.com/mcp" + rms := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "rms", Namespace: "ns"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "test", + URL: specURL, + }, + } + + t.Run("nil rewriter uses spec.URL verbatim", func(t *testing.T) { + kube := fake.NewClientBuilder().WithScheme(scheme).Build() + r := &kagentReconciler{kube: kube, rmsURLRewriter: nil} + + tsp, err := r.createMcpTransport(context.Background(), rms) + require.NoError(t, err) + require.NotNil(t, tsp) + streamable, ok := tsp.(*mcp.StreamableClientTransport) + require.True(t, ok, "expected Streamable HTTP transport for default protocol") + assert.Equal(t, specURL, streamable.Endpoint) + }) + + t.Run("rewriter substitutes the dial URL", func(t *testing.T) { + const rewrittenURL = "http://upstream.example.com:443/mcp" + stub := &stubRewriter{respURL: rewrittenURL} + + kube := fake.NewClientBuilder().WithScheme(scheme).Build() + r := &kagentReconciler{kube: kube, rmsURLRewriter: stub} + + tsp, err := r.createMcpTransport(context.Background(), rms) + require.NoError(t, err) + require.True(t, stub.called, "rewriter must be called") + require.NotNil(t, stub.gotRMS) + assert.Equal(t, "rms", stub.gotRMS.Name, "rewriter receives the RMS being dialed") + + streamable, ok := tsp.(*mcp.StreamableClientTransport) + require.True(t, ok) + assert.Equal(t, rewrittenURL, streamable.Endpoint, "rewritten URL must become the dial target") + }) + + t.Run("rewriter error propagates and aborts transport construction", func(t *testing.T) { + stub := &stubRewriter{respErr: assert.AnError} + + kube := fake.NewClientBuilder().WithScheme(scheme).Build() + r := &kagentReconciler{kube: kube, rmsURLRewriter: stub} + + tsp, err := r.createMcpTransport(context.Background(), rms) + assert.Nil(t, tsp) + require.Error(t, err) + assert.Contains(t, err.Error(), "rewrite RemoteMCPServer URL") + }) + + t.Run("rewriter applies to SSE transport too", func(t *testing.T) { + const rewrittenURL = "http://upstream.example.com:443/sse" + stub := &stubRewriter{respURL: rewrittenURL} + + sseRMS := rms.DeepCopy() + sseRMS.Spec.Protocol = v1alpha2.RemoteMCPServerProtocolSse + + kube := fake.NewClientBuilder().WithScheme(scheme).Build() + r := &kagentReconciler{kube: kube, rmsURLRewriter: stub} + + tsp, err := r.createMcpTransport(context.Background(), sseRMS) + require.NoError(t, err) + sse, ok := tsp.(*mcp.SSEClientTransport) + require.True(t, ok, "expected SSE transport when protocol is SSE") + assert.Equal(t, rewrittenURL, sse.Endpoint) + }) +} + +// generateTestCAPEM produces a minimal self-signed PEM certificate the +// AppendCertsFromPEM call accepts. The cert isn't valid for any URL — +// these tests only exercise the controller's parse-and-pool path. +func generateTestCAPEM(t *testing.T) []byte { + t.Helper() + priv, err := rsa.GenerateKey(rand.Reader, 2048) + require.NoError(t, err) + tmpl := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "kagent-test-ca"}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(time.Hour), + IsCA: true, + KeyUsage: x509.KeyUsageCertSign, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &priv.PublicKey, priv) + require.NoError(t, err) + return pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}) } diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index 0ee9528f8b..1d8468ce87 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -217,56 +217,108 @@ func (r *adkApiTranslator) GetOwnedResourceTypes() []client.Object { const ( googleCredsVolumeName = "google-creds" - tlsCACertVolumeName = "tls-ca-cert" - tlsCACertMountPath = "/etc/ssl/certs/custom" + tlsCAVolumePrefix = "tls-ca-" + tlsCAMountRoot = "/etc/ssl/certs/custom" + maxDNS1123LabelLen = 63 gdchCredsVolumeName = "gdch-creds" gdchCredsMountPath = "/gdch-creds" ) -// populateTLSFields populates TLS configuration fields in the BaseModel -// from the ModelConfig TLS spec. -func populateTLSFields(baseModel *adk.BaseModel, tlsConfig *v1alpha2.TLSConfig) { - if tlsConfig == nil { - return - } - - // Set TLS configuration fields in BaseModel - baseModel.TLSInsecureSkipVerify = &tlsConfig.DisableVerify - baseModel.TLSDisableSystemCAs = &tlsConfig.DisableSystemCAs +// dns1123LabelRE matches RFC 1123 labels (lowercase alphanumeric + dashes, +// must start and end with alphanumeric). K8s volume names require this +// grammar — but K8s Secret names follow the looser DNS_SUBDOMAIN grammar +// (dots allowed, up to 253 chars), so a literal Secret name like +// `corp.ca` or cert-manager-style `mcp.example.com-tls` would fail volume +// name validation if embedded verbatim. tlsCAPaths hashes the name when +// it would violate this regex (or the length limit) for that reason. +var dns1123LabelRE = regexp.MustCompile(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`) + +// tlsCAPaths returns deterministic volume name, mount path, and cert file +// path for the given Secret reference. Per-Secret naming lets multiple TLS +// sources (chat ModelConfig + embedding ModelConfig + RemoteMCPServers) on +// the same agent pod coexist without colliding when their +// modelDeploymentData entries get merged (mergeDeploymentData dedupes by +// (Name, MountPath) for VolumeMounts and by Name for Volumes). +func tlsCAPaths(secretName, key string) (volumeName, mountPath, certPath string) { + candidate := tlsCAVolumePrefix + secretName + id := secretName + if len(candidate) > maxDNS1123LabelLen || !dns1123LabelRE.MatchString(candidate) { + h := sha256.Sum256([]byte(secretName)) + id = hex.EncodeToString(h[:])[:8] + } + volumeName = tlsCAVolumePrefix + id + mountPath = path.Join(tlsCAMountRoot, id) + certPath = path.Join(mountPath, key) + return +} - // Set CA cert path if Secret and key are both specified +// deriveTLSFields turns a v1alpha2.TLSConfig into the three pointer fields +// that every TLS-aware adk wire type carries (BaseModel, +// StreamableHTTPConnectionParams, SseConnectionParams). Returns nils for +// nil or all-zero configs so the caller can assign-through to all three +// fields in a single statement. Emitting explicit `false` booleans on +// an empty struct would flip the Python runtime out of its no-op +// short-circuit and silently swap google-adk's default httpx client for +// kagent's, which has the same SSL behavior but different +// timeout/redirect defaults. +func deriveTLSFields(tlsConfig *v1alpha2.TLSConfig) (insecureSkipVerify *bool, caCertPath *string, disableSystemCAs *bool) { + if tlsConfig.IsEmpty() { + return nil, nil, nil + } + insecureSkipVerify = &tlsConfig.DisableVerify + disableSystemCAs = &tlsConfig.DisableSystemCAs if tlsConfig.CACertSecretRef != "" && tlsConfig.CACertSecretKey != "" { - certPath := fmt.Sprintf("%s/%s", tlsCACertMountPath, tlsConfig.CACertSecretKey) - baseModel.TLSCACertPath = &certPath + _, _, p := tlsCAPaths(tlsConfig.CACertSecretRef, tlsConfig.CACertSecretKey) + caCertPath = &p } + return } -// addTLSConfiguration adds TLS certificate volume mounts to modelDeploymentData -// when TLS configuration is present in the ModelConfig. -// Note: TLS configuration fields are now included in agent config JSON via BaseModel, -// so this function only handles volume mounting. +// addTLSConfiguration mounts a CA Secret as a per-Secret read-only volume on +// modelDeploymentData. Safe to call multiple times for the same agent with +// the same OR different TLSConfigs: +// - different Secrets produce different volume names + paths and accumulate. +// - the same Secret referenced from multiple sources (e.g. several RMSs +// pointing at one shared corp-CA bundle) is idempotent — we skip the +// append if a volume with the same name is already present, because the +// RemoteMCPServer path (translateRemoteMCPServerTarget) appends directly to the +// already-merged modelDeploymentData rather than through +// mergeDeploymentData. +// +// Spec validation (Secret exists, named key present) is the reconciler's +// job for both ModelConfig and RemoteMCPServer — see the TLS branches of +// ReconcileKagentModelConfig and ReconcileKagentRemoteMCPServer. The +// translator trusts that the Status has already surfaced any +// misconfiguration; mounting an absent key here would crash the agent at +// startup, but the operator gets the early signal on the resource's own +// Accepted condition. func addTLSConfiguration(modelDeploymentData *modelDeploymentData, tlsConfig *v1alpha2.TLSConfig) { if tlsConfig == nil { return } - // Add Secret volume mount if both CA certificate Secret and key are specified if tlsConfig.CACertSecretRef != "" && tlsConfig.CACertSecretKey != "" { - // Add volume from Secret + volumeName, mountPath, _ := tlsCAPaths(tlsConfig.CACertSecretRef, tlsConfig.CACertSecretKey) + + for _, v := range modelDeploymentData.Volumes { + if v.Name == volumeName { + return + } + } + modelDeploymentData.Volumes = append(modelDeploymentData.Volumes, corev1.Volume{ - Name: tlsCACertVolumeName, + Name: volumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: tlsConfig.CACertSecretRef, - DefaultMode: new(int32(0444)), // Read-only for all users + DefaultMode: new(int32(0444)), }, }, }) - // Add volume mount modelDeploymentData.VolumeMounts = append(modelDeploymentData.VolumeMounts, corev1.VolumeMount{ - Name: tlsCACertVolumeName, - MountPath: tlsCACertMountPath, + Name: volumeName, + MountPath: mountPath, ReadOnly: true, }) } @@ -371,7 +423,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC }, } // Populate TLS fields in BaseModel - populateTLSFields(&openai.BaseModel, model.Spec.TLS) + openai.TLSInsecureSkipVerify, openai.TLSCACertPath, openai.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) // Populate TokenExchange fields (OpenAI-specific) addTokenExchangeConfiguration(openai, modelDeploymentData, &model.Spec) openai.APIKeyPassthrough = model.Spec.APIKeyPassthrough @@ -429,7 +481,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC }, } // Populate TLS fields in BaseModel - populateTLSFields(&anthropic.BaseModel, model.Spec.TLS) + anthropic.TLSInsecureSkipVerify, anthropic.TLSCACertPath, anthropic.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) anthropic.APIKeyPassthrough = model.Spec.APIKeyPassthrough if model.Spec.Anthropic != nil { @@ -478,7 +530,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC }, } // Populate TLS fields in BaseModel - populateTLSFields(&azureOpenAI.BaseModel, model.Spec.TLS) + azureOpenAI.TLSInsecureSkipVerify, azureOpenAI.TLSCACertPath, azureOpenAI.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) azureOpenAI.APIKeyPassthrough = model.Spec.APIKeyPassthrough return azureOpenAI, modelDeploymentData, secretHashBytes, nil @@ -523,7 +575,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC }, } // Populate TLS fields in BaseModel - populateTLSFields(&gemini.BaseModel, model.Spec.TLS) + gemini.TLSInsecureSkipVerify, gemini.TLSCACertPath, gemini.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) gemini.APIKeyPassthrough = model.Spec.APIKeyPassthrough return gemini, modelDeploymentData, secretHashBytes, nil @@ -564,7 +616,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC }, } // Populate TLS fields in BaseModel - populateTLSFields(&anthropic.BaseModel, model.Spec.TLS) + anthropic.TLSInsecureSkipVerify, anthropic.TLSCACertPath, anthropic.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) anthropic.APIKeyPassthrough = model.Spec.APIKeyPassthrough return anthropic, modelDeploymentData, secretHashBytes, nil @@ -588,7 +640,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC Options: model.Spec.Ollama.Options, } // Populate TLS fields in BaseModel - populateTLSFields(&ollama.BaseModel, model.Spec.TLS) + ollama.TLSInsecureSkipVerify, ollama.TLSCACertPath, ollama.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) ollama.APIKeyPassthrough = model.Spec.APIKeyPassthrough return ollama, modelDeploymentData, secretHashBytes, nil @@ -611,8 +663,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC }, } // Populate TLS fields in BaseModel - populateTLSFields(&gemini.BaseModel, model.Spec.TLS) - + gemini.TLSInsecureSkipVerify, gemini.TLSCACertPath, gemini.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) return gemini, modelDeploymentData, secretHashBytes, nil case v1alpha2.ModelProviderBedrock: if model.Spec.Bedrock == nil { @@ -700,7 +751,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC } // Populate TLS fields in BaseModel - populateTLSFields(&bedrock.BaseModel, model.Spec.TLS) + bedrock.TLSInsecureSkipVerify, bedrock.TLSCACertPath, bedrock.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) bedrock.APIKeyPassthrough = model.Spec.APIKeyPassthrough return bedrock, modelDeploymentData, secretHashBytes, nil @@ -749,7 +800,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC AuthUrl: model.Spec.SAPAICore.AuthURL, } - populateTLSFields(&sapAICore.BaseModel, model.Spec.TLS) + sapAICore.TLSInsecureSkipVerify, sapAICore.TLSCACertPath, sapAICore.TLSDisableSystemCAs = deriveTLSFields(model.Spec.TLS) sapAICore.APIKeyPassthrough = model.Spec.APIKeyPassthrough return sapAICore, modelDeploymentData, secretHashBytes, nil @@ -788,6 +839,7 @@ func (a *adkApiTranslator) translateStreamableHttpTool(ctx context.Context, serv if server.Spec.TerminateOnClose != nil { params.TerminateOnClose = server.Spec.TerminateOnClose } + params.TLSInsecureSkipVerify, params.TLSCACertPath, params.TLSDisableSystemCAs = deriveTLSFields(server.Spec.TLS) return params, nil } @@ -819,10 +871,11 @@ func (a *adkApiTranslator) translateSseHttpTool(ctx context.Context, server *v1a if server.Spec.SseReadTimeout != nil { params.SseReadTimeout = new(server.Spec.SseReadTimeout.Seconds()) } + params.TLSInsecureSkipVerify, params.TLSCACertPath, params.TLSDisableSystemCAs = deriveTLSFields(server.Spec.TLS) return params, nil } -func (a *adkApiTranslator) translateMCPServerTarget(ctx context.Context, agent *adk.AgentConfig, agentNamespace string, toolServer *v1alpha2.McpServerTool, agentHeaders map[string]string, proxyURL string) error { +func (a *adkApiTranslator) translateMCPServerTarget(ctx context.Context, agent *adk.AgentConfig, mdd *modelDeploymentData, agentNamespace string, toolServer *v1alpha2.McpServerTool, agentHeaders map[string]string, proxyURL string) ([]byte, error) { gvk := toolServer.GroupKind() switch gvk { @@ -845,15 +898,15 @@ func (a *adkApiTranslator) translateMCPServerTarget(ctx context.Context, agent * err := a.kube.Get(ctx, mcpServerRef, mcpServer) if err != nil { - return err + return nil, err } remoteMcpServer, err := ConvertMCPServerToRemoteMCPServer(mcpServer) if err != nil { - return err + return nil, err } - return a.translateRemoteMCPServerTarget(ctx, agent, remoteMcpServer, toolServer, agentHeaders, proxyURL) + return a.translateRemoteMCPServerTarget(ctx, agent, mdd, remoteMcpServer, toolServer, agentHeaders, proxyURL) case schema.GroupKind{ Group: "", @@ -869,7 +922,7 @@ func (a *adkApiTranslator) translateMCPServerTarget(ctx context.Context, agent * err := a.kube.Get(ctx, remoteMcpServerRef, remoteMcpServer) if err != nil { - return err + return nil, err } // RemoteMCPServer uses user-supplied URLs, but if the URL points to an internal k8s service, @@ -879,7 +932,7 @@ func (a *adkApiTranslator) translateMCPServerTarget(ctx context.Context, agent * proxyURL = a.globalProxyURL } - return a.translateRemoteMCPServerTarget(ctx, agent, remoteMcpServer, toolServer, agentHeaders, proxyURL) + return a.translateRemoteMCPServerTarget(ctx, agent, mdd, remoteMcpServer, toolServer, agentHeaders, proxyURL) case schema.GroupKind{ Group: "", Kind: "Service", @@ -894,26 +947,26 @@ func (a *adkApiTranslator) translateMCPServerTarget(ctx context.Context, agent * err := a.kube.Get(ctx, svcRef, svc) if err != nil { - return err + return nil, err } remoteMcpServer, err := ConvertServiceToRemoteMCPServer(svc) if err != nil { - return err + return nil, err } - return a.translateRemoteMCPServerTarget(ctx, agent, remoteMcpServer, toolServer, agentHeaders, proxyURL) + return a.translateRemoteMCPServerTarget(ctx, agent, mdd, remoteMcpServer, toolServer, agentHeaders, proxyURL) default: - return fmt.Errorf("unknown tool server type: %s", gvk) + return nil, fmt.Errorf("unknown tool server type: %s", gvk) } } -func (a *adkApiTranslator) translateRemoteMCPServerTarget(ctx context.Context, agent *adk.AgentConfig, remoteMcpServer *v1alpha2.RemoteMCPServer, mcpServerTool *v1alpha2.McpServerTool, agentHeaders map[string]string, proxyURL string) error { +func (a *adkApiTranslator) translateRemoteMCPServerTarget(ctx context.Context, agent *adk.AgentConfig, mdd *modelDeploymentData, remoteMcpServer *v1alpha2.RemoteMCPServer, mcpServerTool *v1alpha2.McpServerTool, agentHeaders map[string]string, proxyURL string) ([]byte, error) { switch remoteMcpServer.Spec.Protocol { case v1alpha2.RemoteMCPServerProtocolSse: tool, err := a.translateSseHttpTool(ctx, remoteMcpServer, agentHeaders, proxyURL) if err != nil { - return err + return nil, err } agent.SseTools = append(agent.SseTools, adk.SseMcpServerConfig{ Params: *tool, @@ -924,7 +977,7 @@ func (a *adkApiTranslator) translateRemoteMCPServerTarget(ctx context.Context, a default: tool, err := a.translateStreamableHttpTool(ctx, remoteMcpServer, agentHeaders, proxyURL) if err != nil { - return err + return nil, err } agent.HttpTools = append(agent.HttpTools, adk.HttpMcpServerConfig{ Params: *tool, @@ -933,7 +986,33 @@ func (a *adkApiTranslator) translateRemoteMCPServerTarget(ctx context.Context, a RequireApproval: mcpServerTool.RequireApproval, }) } - return nil + // Mount the CA Secret on the agent pod when the RemoteMCPServer pins a TLS bundle. + // Converters that synthesize RMSs from in-cluster MCPServer/Service + // references don't set Spec.TLS, so this is a no-op for those. Returns + // the controller-resolved TLS Secret hash so callers can mix it into + // the agent's config hash — that's the signal that drives a rollout + // when the CA Secret rotates in place (same Secret name, new PEM). + if mdd != nil { + addTLSConfiguration(mdd, remoteMcpServer.Spec.TLS) + } + return remoteMCPServerSecretHashBytes(remoteMcpServer), nil +} + +// remoteMCPServerSecretHashBytes returns the hex-decoded bytes of the +// RMS's Status.SecretHash so the agent translator can fold them into the +// agent's config hash. Returns nil (no contribution, no error) when the +// status hash is empty or malformed — the controller is responsible for +// keeping Status.SecretHash in sync, and a transient missing/garbage +// value should not block agent translation. +func remoteMCPServerSecretHashBytes(remoteMcpServer *v1alpha2.RemoteMCPServer) []byte { + if remoteMcpServer == nil || remoteMcpServer.Status.SecretHash == "" { + return nil + } + decoded, err := hex.DecodeString(remoteMcpServer.Status.SecretHash) + if err != nil { + return nil + } + return decoded } // Helper functions @@ -1042,9 +1121,13 @@ func mergeDeploymentData(dst, src *modelDeploymentData) { } } for _, sm := range src.VolumeMounts { + // Dedupe by (Name, MountPath). MountPath-only dedupe would silently + // drop a mount from a different Volume that happened to choose the + // same path; matching on both lets kubelet surface the conflict + // loudly instead. found := false for _, m := range dst.VolumeMounts { - if m.MountPath == sm.MountPath { + if m.Name == sm.Name && m.MountPath == sm.MountPath { found = true break } diff --git a/go/core/internal/controller/translator/agent/compiler.go b/go/core/internal/controller/translator/agent/compiler.go index 0232a859e6..93cb893a58 100644 --- a/go/core/internal/controller/translator/agent/compiler.go +++ b/go/core/internal/controller/translator/agent/compiler.go @@ -252,10 +252,19 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent v1alp switch { case tool.McpServer != nil: - err := a.translateMCPServerTarget(ctx, cfg, agent.GetNamespace(), tool.McpServer, headers, a.globalProxyURL) + toolHashBytes, err := a.translateMCPServerTarget(ctx, cfg, mdd, agent.GetNamespace(), tool.McpServer, headers, a.globalProxyURL) if err != nil { return nil, nil, nil, err } + // Fold the RemoteMCPServer's TLS-Secret hash into the agent + // config hash so an in-place rotation of the RMS CA Secret + // (same Secret name, new PEM) triggers a rollout — the + // agent pod has the old cert cached in memory and would + // otherwise keep dialing with stale trust. Mirrors how + // ModelConfig.Status.SecretHash flows in above. + if len(toolHashBytes) > 0 { + secretHashBytes = append(secretHashBytes, toolHashBytes...) + } case tool.Agent != nil: agentRef := tool.Agent.NamespacedName(agent.GetNamespace()) diff --git a/go/core/internal/controller/translator/agent/remotemcpserver_tls_test.go b/go/core/internal/controller/translator/agent/remotemcpserver_tls_test.go new file mode 100644 index 0000000000..994ce5df16 --- /dev/null +++ b/go/core/internal/controller/translator/agent/remotemcpserver_tls_test.go @@ -0,0 +1,597 @@ +package agent_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + translator "github.com/kagent-dev/kagent/go/core/internal/controller/translator/agent" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + schemev1 "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +// findDeployment extracts the agent Deployment from a translator output bundle. +// Translator output is a heterogeneous slice of K8s objects; the agent pod +// spec lives on the Deployment. +func findDeployment(t *testing.T, outputs *translator.AgentOutputs) *appsv1.Deployment { + t.Helper() + for _, obj := range outputs.Manifest { + if d, ok := obj.(*appsv1.Deployment); ok { + return d + } + } + t.Fatal("Deployment not found in manifest") + return nil +} + +// hasVolumeForSecret returns the volume in the deployment that mounts the +// named Secret, or nil if no such volume is present. Used to assert TLS CA +// Secrets land on the agent pod when RMS.Spec.TLS or ModelConfig.Spec.TLS +// reference them. +func hasVolumeForSecret(dep *appsv1.Deployment, secretName string) *corev1.Volume { + for i, v := range dep.Spec.Template.Spec.Volumes { + if v.Secret != nil && v.Secret.SecretName == secretName { + return &dep.Spec.Template.Spec.Volumes[i] + } + } + return nil +} + +// Test_AdkApiTranslator_RMSTLS_DisableVerify exercises the simplest TLS path +// on a RemoteMCPServer: spec.tls.disableVerify=true with no CA Secret. The +// agent's StreamableHTTP connection params should carry the disable-verify +// flag, and no CA volume should be mounted (nothing to mount). +func Test_AdkApiTranslator_RMSTLS_DisableVerify(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "tls-test"}} + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "model", Namespace: "tls-test"}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + }, + } + rms := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "self-signed-upstream", Namespace: "tls-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "Self-signed test fixture", + URL: "https://upstream.example.com/mcp", + TLS: &v1alpha2.TLSConfig{ + DisableVerify: true, + }, + }, + } + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "agent", Namespace: "tls-test"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "You are an agent", + ModelConfig: "model", + Tools: []*v1alpha2.Tool{{ + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", + ApiGroup: "kagent.dev", + Name: "self-signed-upstream", + }, + }, + }}, + }, + }, + } + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(ns, modelConfig, rms, agent). + Build() + + trans := translator.NewAdkApiTranslator( + kubeClient, + types.NamespacedName{Namespace: "tls-test", Name: "model"}, + nil, "", nil, + ) + outputs, err := translator.TranslateAgent(context.Background(), trans, agent) + require.NoError(t, err) + require.NotNil(t, outputs.Config) + + require.Len(t, outputs.Config.HttpTools, 1, "Expected one Streamable HTTP tool") + params := outputs.Config.HttpTools[0].Params + require.NotNil(t, params.TLSInsecureSkipVerify, "TLSInsecureSkipVerify should be set when spec.tls is present") + assert.True(t, *params.TLSInsecureSkipVerify, "disableVerify should propagate") + assert.Nil(t, params.TLSCACertPath, "No CA path expected without CACertSecretRef") + + // No CA Secret → no extra TLS volume on the deployment. + dep := findDeployment(t, outputs) + for _, v := range dep.Spec.Template.Spec.Volumes { + if v.Secret != nil { + assert.NotContains(t, v.Name, "tls-ca-", "No TLS volume expected when no CACertSecretRef is set") + } + } +} + +// Test_AdkApiTranslator_RMSTLS_CustomCA exercises the production path: an +// RemoteMCPServer pinning a CA Secret produces (a) TLSCACertPath on the wire config so +// the runtime initializes its trust store from disk, and (b) a per-Secret +// read-only volume + mount on the agent pod so the file is actually present. +func Test_AdkApiTranslator_RMSTLS_CustomCA(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "tls-test"}} + caSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-corp-ca", Namespace: "tls-test"}, + Data: map[string][]byte{"ca.crt": []byte("FAKE CA PEM")}, + } + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "model", Namespace: "tls-test"}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + }, + } + rms := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-upstream", Namespace: "tls-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "Corporate-CA upstream", + URL: "https://mcp.corp.internal/mcp", + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: "rms-corp-ca", + CACertSecretKey: "ca.crt", + }, + }, + } + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "agent", Namespace: "tls-test"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "You are an agent", + ModelConfig: "model", + Tools: []*v1alpha2.Tool{{ + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", + ApiGroup: "kagent.dev", + Name: "corp-upstream", + }, + }, + }}, + }, + }, + } + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(ns, caSecret, modelConfig, rms, agent). + Build() + + trans := translator.NewAdkApiTranslator( + kubeClient, + types.NamespacedName{Namespace: "tls-test", Name: "model"}, + nil, "", nil, + ) + outputs, err := translator.TranslateAgent(context.Background(), trans, agent) + require.NoError(t, err) + + require.Len(t, outputs.Config.HttpTools, 1) + params := outputs.Config.HttpTools[0].Params + require.NotNil(t, params.TLSCACertPath, "TLSCACertPath should be set when CA Secret is referenced") + assert.Contains(t, *params.TLSCACertPath, "rms-corp-ca/ca.crt", + "Cert path should embed the Secret name (operator-readable, per-Secret bucket)") + + dep := findDeployment(t, outputs) + v := hasVolumeForSecret(dep, "rms-corp-ca") + require.NotNil(t, v, "Deployment should mount the CA Secret as a volume") + require.NotNil(t, v.Secret.DefaultMode) + assert.Equal(t, int32(0444), *v.Secret.DefaultMode, "CA Secret volume should be read-only 0444") + + // Mount path on the agent container should match the cert path's + // directory and be marked read-only. + var mountFound bool + for _, m := range dep.Spec.Template.Spec.Containers[0].VolumeMounts { + if m.Name == v.Name { + assert.True(t, m.ReadOnly, "CA mount should be read-only") + assert.Contains(t, m.MountPath, "rms-corp-ca") + mountFound = true + } + } + assert.True(t, mountFound, "Agent container should mount the CA volume") +} + +// Test_AdkApiTranslator_RMSTLS_SharedSecretAcrossRMSs guards a regression +// where two RemoteMCPServers in the same agent reference the same CA +// Secret. translateRemoteMCPServerTarget calls addTLSConfiguration once +// per RemoteMCPServer directly on the already-merged modelDeploymentData, so without +// idempotency we'd produce two Volume entries with the same Name and the +// API server would reject the Deployment with `Pod.spec.volumes[N].name: +// Duplicate value`. The "all our internal MCP services share one +// corporate-CA bundle" topology is the realistic shape that triggers it. +func Test_AdkApiTranslator_RMSTLS_SharedSecretAcrossRMSs(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "tls-test"}} + caSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-ca", Namespace: "tls-test"}, + Data: map[string][]byte{"ca.crt": []byte("FAKE CA PEM")}, + } + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "model", Namespace: "tls-test"}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + }, + } + sharedTLS := &v1alpha2.TLSConfig{ + CACertSecretRef: "corp-ca", + CACertSecretKey: "ca.crt", + } + rmsA := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-a", Namespace: "tls-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "RMS A behind corp CA", + URL: "https://a.corp.internal/mcp", + TLS: sharedTLS, + }, + } + rmsB := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-b", Namespace: "tls-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "RMS B behind corp CA", + URL: "https://b.corp.internal/mcp", + TLS: sharedTLS, + }, + } + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "agent", Namespace: "tls-test"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "You are an agent", + ModelConfig: "model", + Tools: []*v1alpha2.Tool{ + { + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", ApiGroup: "kagent.dev", Name: "rms-a", + }, + }, + }, + { + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", ApiGroup: "kagent.dev", Name: "rms-b", + }, + }, + }, + }, + }, + }, + } + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(ns, caSecret, modelConfig, rmsA, rmsB, agent). + Build() + + trans := translator.NewAdkApiTranslator( + kubeClient, + types.NamespacedName{Namespace: "tls-test", Name: "model"}, + nil, "", nil, + ) + outputs, err := translator.TranslateAgent(context.Background(), trans, agent) + require.NoError(t, err) + + dep := findDeployment(t, outputs) + + // Count Volumes that mount the shared Secret. Must be exactly one; + // duplicates would fail kubelet admission. + matchingVolumes := 0 + for _, v := range dep.Spec.Template.Spec.Volumes { + if v.Secret != nil && v.Secret.SecretName == "corp-ca" { + matchingVolumes++ + } + } + assert.Equal(t, 1, matchingVolumes, "Same Secret referenced from multiple RMSs must produce exactly one Volume") + + // Both RMSs' connection params should still point at the same cert + // file — the mount is shared but every transport reads from the + // same on-disk path. + require.Len(t, outputs.Config.HttpTools, 2) + pathA := outputs.Config.HttpTools[0].Params.TLSCACertPath + pathB := outputs.Config.HttpTools[1].Params.TLSCACertPath + require.NotNil(t, pathA) + require.NotNil(t, pathB) + assert.Equal(t, *pathA, *pathB, "Both RMSs sharing a Secret should resolve to the same cert path") +} + +// Test_AdkApiTranslator_RMSTLS_EmptyTLSStructIsNoOp guards against the +// trap where a non-nil but all-zero `Spec.TLS: {}` produces wire fields +// (`tls_insecure_skip_verify: false`, `tls_disable_system_cas: false`) +// that would flip the Python runtime out of its no-op short-circuit and +// silently swap google-adk's default httpx client factory for kagent's. +// Both factories produce the same SSL behavior here but differ on +// timeout / redirect defaults; the principle of least surprise says +// `{}` should be indistinguishable from `nil`. +func Test_AdkApiTranslator_RMSTLS_EmptyTLSStructIsNoOp(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "tls-test"}} + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "model", Namespace: "tls-test"}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + }, + } + rms := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "upstream", Namespace: "tls-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "Upstream with empty TLS struct", + URL: "https://upstream.example.com/mcp", + TLS: &v1alpha2.TLSConfig{}, + }, + } + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "agent", Namespace: "tls-test"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "You are an agent", + ModelConfig: "model", + Tools: []*v1alpha2.Tool{{ + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", ApiGroup: "kagent.dev", Name: "upstream", + }, + }, + }}, + }, + }, + } + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(ns, modelConfig, rms, agent). + Build() + + trans := translator.NewAdkApiTranslator( + kubeClient, + types.NamespacedName{Namespace: "tls-test", Name: "model"}, + nil, "", nil, + ) + outputs, err := translator.TranslateAgent(context.Background(), trans, agent) + require.NoError(t, err) + + require.Len(t, outputs.Config.HttpTools, 1) + params := outputs.Config.HttpTools[0].Params + assert.Nil(t, params.TLSInsecureSkipVerify, + "Empty TLS struct must not emit explicit booleans on the wire") + assert.Nil(t, params.TLSDisableSystemCAs, + "Empty TLS struct must not emit explicit booleans on the wire") + assert.Nil(t, params.TLSCACertPath) +} + +// Test_AdkApiTranslator_RMSTLS_CoexistsWithModelConfigTLS verifies the +// reason the volume-naming rework was necessary: an agent that combines a +// ModelConfig TLS CA with one or more RemoteMCPServer TLS CAs must end up +// with every CA mounted on the pod, not just one. Before per-Secret naming +// the merge dedupe would silently drop later contributors because they all +// declared the same volume name. +func Test_AdkApiTranslator_RMSTLS_CoexistsWithModelConfigTLS(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "tls-test"}} + modelCASecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "model-ca", Namespace: "tls-test"}, + Data: map[string][]byte{"ca.crt": []byte("FAKE MODEL CA PEM")}, + } + rmsACASecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-a-ca", Namespace: "tls-test"}, + Data: map[string][]byte{"ca.crt": []byte("FAKE RMS A CA PEM")}, + } + rmsBCASecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-b-ca", Namespace: "tls-test"}, + Data: map[string][]byte{"ca.crt": []byte("FAKE RMS B CA PEM")}, + } + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "model", Namespace: "tls-test"}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: "model-ca", + CACertSecretKey: "ca.crt", + }, + }, + } + rmsA := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-a", Namespace: "tls-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "RMS A", + URL: "https://a.example.com/mcp", + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: "rms-a-ca", + CACertSecretKey: "ca.crt", + }, + }, + } + rmsB := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-b", Namespace: "tls-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "RMS B", + URL: "https://b.example.com/mcp", + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: "rms-b-ca", + CACertSecretKey: "ca.crt", + }, + }, + } + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "agent", Namespace: "tls-test"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "You are an agent", + ModelConfig: "model", + Tools: []*v1alpha2.Tool{ + { + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", ApiGroup: "kagent.dev", Name: "rms-a", + }, + }, + }, + { + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", ApiGroup: "kagent.dev", Name: "rms-b", + }, + }, + }, + }, + }, + }, + } + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(ns, modelCASecret, rmsACASecret, rmsBCASecret, modelConfig, rmsA, rmsB, agent). + Build() + + trans := translator.NewAdkApiTranslator( + kubeClient, + types.NamespacedName{Namespace: "tls-test", Name: "model"}, + nil, "", nil, + ) + outputs, err := translator.TranslateAgent(context.Background(), trans, agent) + require.NoError(t, err) + + dep := findDeployment(t, outputs) + + // All three Secrets must be mounted on the agent pod. + for _, secretName := range []string{"model-ca", "rms-a-ca", "rms-b-ca"} { + v := hasVolumeForSecret(dep, secretName) + assert.NotNilf(t, v, "Secret %q should be mounted on the agent pod", secretName) + } + + // The three volumes must have distinct names and distinct mount paths + // so the merge dedupe doesn't silently drop any. Volume name + mount + // path are derived deterministically from the Secret name. + tlsVolumeNames := map[string]struct{}{} + tlsMountPaths := map[string]struct{}{} + for _, v := range dep.Spec.Template.Spec.Volumes { + if v.Secret != nil && v.Secret.SecretName != "" { + switch v.Secret.SecretName { + case "model-ca", "rms-a-ca", "rms-b-ca": + tlsVolumeNames[v.Name] = struct{}{} + } + } + } + for _, m := range dep.Spec.Template.Spec.Containers[0].VolumeMounts { + for n := range tlsVolumeNames { + if m.Name == n { + tlsMountPaths[m.MountPath] = struct{}{} + } + } + } + assert.Len(t, tlsVolumeNames, 3, "Three TLS Secrets must produce three distinct volume names") + assert.Len(t, tlsMountPaths, 3, "Three TLS Secrets must produce three distinct mount paths") +} + +// Test_AdkApiTranslator_RMSTLS_SecretHashChangesAgentConfigHash pins the +// cert-rotation behavior: an in-place rotation of the RMS TLS Secret +// (same Secret name, new PEM, status SecretHash flipped by the +// controller) must change the agent's kagent.dev/config-hash so the +// deployment rolls. Without this, agent pods retain the old cert loaded +// at process startup via ssl.create_default_context. +func Test_AdkApiTranslator_RMSTLS_SecretHashChangesAgentConfigHash(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + build := func(rmsHash string) string { + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "rotate-test"}} + caSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-ca", Namespace: "rotate-test"}, + Data: map[string][]byte{"ca.crt": []byte("FAKE CA PEM")}, + } + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "model", Namespace: "rotate-test"}, + Spec: v1alpha2.ModelConfigSpec{Model: "gpt-4o", Provider: v1alpha2.ModelProviderOpenAI}, + } + rms := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-upstream", Namespace: "rotate-test"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "Corp upstream", + URL: "https://mcp.corp.internal/mcp", + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: "corp-ca", + CACertSecretKey: "ca.crt", + }, + }, + Status: v1alpha2.RemoteMCPServerStatus{SecretHash: rmsHash}, + } + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "agent", Namespace: "rotate-test"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "System", + ModelConfig: "model", + Tools: []*v1alpha2.Tool{{ + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + Kind: "RemoteMCPServer", ApiGroup: "kagent.dev", Name: "corp-upstream", + }, + }, + }}, + }, + }, + } + + kube := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(ns, caSecret, modelConfig, rms, agent). + Build() + trans := translator.NewAdkApiTranslator( + kube, + types.NamespacedName{Namespace: "rotate-test", Name: "model"}, + nil, "", nil, + ) + outputs, err := translator.TranslateAgent(context.Background(), trans, agent) + require.NoError(t, err) + dep := findDeployment(t, outputs) + return dep.Spec.Template.Annotations["kagent.dev/config-hash"] + } + + preRotate := build("deadbeef") + postRotate := build("cafef00d") + assert.NotEqual(t, preRotate, postRotate, + "agent config-hash must change when RMS Status.SecretHash rotates") +} diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-custom-ca.json b/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-custom-ca.json index 62e996c083..1c1b2a1502 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-custom-ca.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-custom-ca.json @@ -26,7 +26,7 @@ "max_tokens": 1024, "model": "gpt-4o", "temperature": 0.7, - "tls_ca_cert_path": "/etc/ssl/certs/custom/ca.crt", + "tls_ca_cert_path": "/etc/ssl/certs/custom/custom-ca-cert/ca.crt", "tls_disable_system_cas": false, "tls_insecure_skip_verify": false, "type": "openai" @@ -60,7 +60,7 @@ }, "stringData": { "agent-card.json": "{\"name\":\"tls_agent_with_custom_ca\",\"description\":\"\",\"url\":\"http://tls-agent-with-custom-ca.test:8080\",\"version\":\"\",\"capabilities\":{\"streaming\":true,\"pushNotifications\":false,\"stateTransitionHistory\":true},\"defaultInputModes\":[\"text\"],\"defaultOutputModes\":[\"text\"],\"skills\":[],\"preferredTransport\":\"JSONRPC\"}", - "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"tls_insecure_skip_verify\":false,\"tls_ca_cert_path\":\"/etc/ssl/certs/custom/ca.crt\",\"tls_disable_system_cas\":false,\"base_url\":\"https://internal-litellm.company.com\",\"max_tokens\":1024,\"temperature\":0.7},\"description\":\"\",\"instruction\":\"You are a helpful assistant with custom CA support.\",\"stream\":false}" + "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"tls_insecure_skip_verify\":false,\"tls_ca_cert_path\":\"/etc/ssl/certs/custom/custom-ca-cert/ca.crt\",\"tls_disable_system_cas\":false,\"base_url\":\"https://internal-litellm.company.com\",\"max_tokens\":1024,\"temperature\":0.7},\"description\":\"\",\"instruction\":\"You are a helpful assistant with custom CA support.\",\"stream\":false}" } }, { @@ -129,7 +129,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "7617586729040135348" + "kagent.dev/config-hash": "7742056941063993416" }, "labels": { "app": "kagent", @@ -211,8 +211,8 @@ "name": "config" }, { - "mountPath": "/etc/ssl/certs/custom", - "name": "tls-ca-cert", + "mountPath": "/etc/ssl/certs/custom/custom-ca-cert", + "name": "tls-ca-custom-ca-cert", "readOnly": true }, { @@ -231,7 +231,7 @@ } }, { - "name": "tls-ca-cert", + "name": "tls-ca-custom-ca-cert", "secret": { "defaultMode": 292, "secretName": "custom-ca-cert" diff --git a/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-system-cas-disabled.json b/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-system-cas-disabled.json index 620c29c9f5..331af12705 100644 --- a/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-system-cas-disabled.json +++ b/go/core/internal/controller/translator/agent/testdata/outputs/tls-with-system-cas-disabled.json @@ -26,7 +26,7 @@ "max_tokens": 1024, "model": "gpt-4o", "temperature": 0.7, - "tls_ca_cert_path": "/etc/ssl/certs/custom/ca.crt", + "tls_ca_cert_path": "/etc/ssl/certs/custom/corporate-ca-cert/ca.crt", "tls_disable_system_cas": true, "tls_insecure_skip_verify": false, "type": "openai" @@ -60,7 +60,7 @@ }, "stringData": { "agent-card.json": "{\"name\":\"tls_agent_with_system_cas_disabled\",\"description\":\"\",\"url\":\"http://tls-agent-with-system-cas-disabled.test:8080\",\"version\":\"\",\"capabilities\":{\"streaming\":true,\"pushNotifications\":false,\"stateTransitionHistory\":true},\"defaultInputModes\":[\"text\"],\"defaultOutputModes\":[\"text\"],\"skills\":[],\"preferredTransport\":\"JSONRPC\"}", - "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"tls_insecure_skip_verify\":false,\"tls_ca_cert_path\":\"/etc/ssl/certs/custom/ca.crt\",\"tls_disable_system_cas\":true,\"base_url\":\"https://corp-llm-gateway.internal\",\"max_tokens\":1024,\"temperature\":0.7},\"description\":\"\",\"instruction\":\"You are a helpful assistant in a corporate environment.\",\"stream\":false}" + "config.json": "{\"model\":{\"type\":\"openai\",\"model\":\"gpt-4o\",\"tls_insecure_skip_verify\":false,\"tls_ca_cert_path\":\"/etc/ssl/certs/custom/corporate-ca-cert/ca.crt\",\"tls_disable_system_cas\":true,\"base_url\":\"https://corp-llm-gateway.internal\",\"max_tokens\":1024,\"temperature\":0.7},\"description\":\"\",\"instruction\":\"You are a helpful assistant in a corporate environment.\",\"stream\":false}" } }, { @@ -129,7 +129,7 @@ "template": { "metadata": { "annotations": { - "kagent.dev/config-hash": "1527591075664209989" + "kagent.dev/config-hash": "5192670338632125074" }, "labels": { "app": "kagent", @@ -211,8 +211,8 @@ "name": "config" }, { - "mountPath": "/etc/ssl/certs/custom", - "name": "tls-ca-cert", + "mountPath": "/etc/ssl/certs/custom/corporate-ca-cert", + "name": "tls-ca-corporate-ca-cert", "readOnly": true }, { @@ -231,7 +231,7 @@ } }, { - "name": "tls-ca-cert", + "name": "tls-ca-corporate-ca-cert", "secret": { "defaultMode": 292, "secretName": "corporate-ca-cert" diff --git a/go/core/internal/controller/translator/agent/tls_mounting_test.go b/go/core/internal/controller/translator/agent/tls_mounting_test.go index e203d0167b..2ac688a1ac 100644 --- a/go/core/internal/controller/translator/agent/tls_mounting_test.go +++ b/go/core/internal/controller/translator/agent/tls_mounting_test.go @@ -1,6 +1,7 @@ package agent import ( + "path" "testing" "github.com/kagent-dev/kagent/go/api/v1alpha2" @@ -45,21 +46,19 @@ func Test_addTLSConfiguration_WithCACertSecret(t *testing.T) { addTLSConfiguration(mdd, tlsConfig) - // Verify volume is added - require.Len(t, mdd.Volumes, 1, "Expected 1 volume for TLS cert secret") + wantVolumeName, wantMountPath, _ := tlsCAPaths("internal-ca-cert", "ca.crt") + require.Len(t, mdd.Volumes, 1, "Expected 1 volume for TLS cert secret") volume := mdd.Volumes[0] - assert.Equal(t, tlsCACertVolumeName, volume.Name, "Volume name should match TLS CA cert volume name") + assert.Equal(t, wantVolumeName, volume.Name, "Volume name should be derived from Secret name") require.NotNil(t, volume.Secret, "Expected Secret volume source") assert.Equal(t, "internal-ca-cert", volume.Secret.SecretName, "Secret name should match CACertSecretRef") assert.Equal(t, int32(0444), *volume.Secret.DefaultMode, "DefaultMode should be 0444 for read-only cert") - // Verify volume mount is added require.Len(t, mdd.VolumeMounts, 1, "Expected 1 volume mount for TLS cert") - mount := mdd.VolumeMounts[0] - assert.Equal(t, tlsCACertVolumeName, mount.Name, "Volume mount name should match volume name") - assert.Equal(t, tlsCACertMountPath, mount.MountPath, "Mount path should be TLS cert mount path") + assert.Equal(t, wantVolumeName, mount.Name, "Volume mount name should match volume name") + assert.Equal(t, wantMountPath, mount.MountPath, "Mount path should be derived from Secret name") assert.True(t, mount.ReadOnly, "Volume mount should be read-only") } @@ -88,14 +87,13 @@ func Test_addTLSConfiguration_CustomCertKey(t *testing.T) { addTLSConfiguration(mdd, tlsConfig) - // Verify volume is added - require.Len(t, mdd.Volumes, 1, "Expected 1 volume for TLS cert with custom key") + _, wantMountPath, _ := tlsCAPaths("internal-ca-cert", "custom-ca.pem") - // Verify volume mount is added at the correct path + require.Len(t, mdd.Volumes, 1, "Expected 1 volume for TLS cert with custom key") require.Len(t, mdd.VolumeMounts, 1, "Expected 1 volume mount for TLS cert") mount := mdd.VolumeMounts[0] - assert.Equal(t, tlsCACertMountPath, mount.MountPath, "Mount path should be TLS cert mount path") + assert.Equal(t, wantMountPath, mount.MountPath, "Mount path should be derived from Secret name") } // Test_addTLSConfiguration_DisableSystemCAsFlag verifies no volumes added when no cert secret @@ -142,16 +140,143 @@ func Test_addTLSConfiguration_AllFieldsCombined(t *testing.T) { addTLSConfiguration(mdd, tlsConfig) - // Verify volume and mount + _, wantMountPath, _ := tlsCAPaths("my-ca-bundle", "bundle.crt") + require.Len(t, mdd.Volumes, 1, "Expected 1 volume for combined TLS config") require.Len(t, mdd.VolumeMounts, 1, "Expected 1 volume mount for combined TLS config") - // Verify volume references correct Secret volume := mdd.Volumes[0] require.NotNil(t, volume.Secret, "Expected Secret volume source") assert.Equal(t, "my-ca-bundle", volume.Secret.SecretName, "Secret name should match CACertSecretRef") - // Verify mount path is correct mount := mdd.VolumeMounts[0] - assert.Equal(t, tlsCACertMountPath, mount.MountPath, "Mount path should be TLS cert mount path") + assert.Equal(t, wantMountPath, mount.MountPath, "Mount path should be derived from Secret name") +} + +// Test_addTLSConfiguration_MultipleSecretsDoNotCollide verifies that mounting +// CAs from different Secrets on the same agent produces distinct volumes and +// distinct mount paths so the merge dedupe (mergeDeploymentData) preserves +// both. Without per-Secret naming, the merge would silently drop one because +// it dedupes by volume name and mount path. That matters now that RMSs and +// ModelConfigs can each contribute CA mounts to the same agent pod. +func Test_addTLSConfiguration_MultipleSecretsDoNotCollide(t *testing.T) { + mdd := &modelDeploymentData{} + + addTLSConfiguration(mdd, &v1alpha2.TLSConfig{ + CACertSecretRef: "chat-ca", + CACertSecretKey: "ca.crt", + }) + addTLSConfiguration(mdd, &v1alpha2.TLSConfig{ + CACertSecretRef: "embedding-ca", + CACertSecretKey: "ca.crt", + }) + addTLSConfiguration(mdd, &v1alpha2.TLSConfig{ + CACertSecretRef: "rms-corp-ca", + CACertSecretKey: "ca.crt", + }) + + require.Len(t, mdd.Volumes, 3, "Three distinct Secrets should produce three volumes") + require.Len(t, mdd.VolumeMounts, 3, "Three distinct Secrets should produce three mounts") + + seenNames := map[string]struct{}{} + seenPaths := map[string]struct{}{} + for _, v := range mdd.Volumes { + seenNames[v.Name] = struct{}{} + } + for _, m := range mdd.VolumeMounts { + seenPaths[m.MountPath] = struct{}{} + } + assert.Len(t, seenNames, 3, "Volume names should be distinct per Secret") + assert.Len(t, seenPaths, 3, "Mount paths should be distinct per Secret") +} + +// Test_addTLSConfiguration_SameSecretMergesCleanly verifies that referencing +// the same Secret + key from multiple sources produces identical volume name +// and mount path on each call. The downstream merge dedupes by both, so the +// agent pod ends up with exactly one mount even when several +// ModelConfigs/RMSs share a Secret. +func Test_addTLSConfiguration_SameSecretMergesCleanly(t *testing.T) { + a := &modelDeploymentData{} + b := &modelDeploymentData{} + + addTLSConfiguration(a, &v1alpha2.TLSConfig{ + CACertSecretRef: "shared-ca", + CACertSecretKey: "ca.crt", + }) + addTLSConfiguration(b, &v1alpha2.TLSConfig{ + CACertSecretRef: "shared-ca", + CACertSecretKey: "ca.crt", + }) + + require.Len(t, a.Volumes, 1) + require.Len(t, b.Volumes, 1) + assert.Equal(t, a.Volumes[0].Name, b.Volumes[0].Name, "Same Secret should produce identical volume name") + assert.Equal(t, a.VolumeMounts[0].MountPath, b.VolumeMounts[0].MountPath, "Same Secret should produce identical mount path") +} + +// Test_tlsCAPaths_LongSecretNameHashes verifies that Secret names which would +// overflow the K8s DNS_LABEL 63-char limit on the volume name get hashed. +func Test_tlsCAPaths_LongSecretNameHashes(t *testing.T) { + // 60-char Secret name → "tls-ca-" + 60 = 67 chars > 63. Must hash. + longName := "a-very-long-secret-name-that-just-exceeds-the-volume-limit-x" + require.Len(t, longName, 60) + + volumeName, mountPath, certPath := tlsCAPaths(longName, "ca.crt") + + assert.LessOrEqual(t, len(volumeName), 63, "Volume name must fit DNS_LABEL") + assert.NotContains(t, volumeName, longName, "Long names should be hashed, not embedded literally") + assert.True(t, len(mountPath) > len(tlsCAMountRoot), "Mount path should include hashed component") + assert.Equal(t, certPath, path.Join(mountPath, "ca.crt")) +} + +// Test_tlsCAPaths_ShortSecretNameEmbeds verifies that Secret names that fit +// the volume-name budget appear in the volume name and mount path verbatim, +// for operator readability when inspecting an agent's pod spec. +func Test_tlsCAPaths_ShortSecretNameEmbeds(t *testing.T) { + volumeName, mountPath, certPath := tlsCAPaths("corp-ca", "ca.crt") + + assert.Equal(t, "tls-ca-corp-ca", volumeName) + assert.Equal(t, "/etc/ssl/certs/custom/corp-ca", mountPath) + assert.Equal(t, "/etc/ssl/certs/custom/corp-ca/ca.crt", certPath) +} + +// Test_tlsCAPaths_DottedSecretNameHashes covers the case where the Secret +// name follows DNS_SUBDOMAIN (allowed by K8s for Secret names: dots and +// up to 253 chars) but violates DNS_LABEL (required for volume names: no +// dots, ≤ 63 chars). Common examples: cert-manager output Secrets named +// after the certificate (`mcp.example.com-tls`), or operator-authored +// names that mirror an FQDN. Without charset-aware hashing, the produced +// volume name would embed the dot and the agent Deployment would fail to +// apply with a `spec.template.spec.volumes[N].name: Invalid value` error. +func Test_tlsCAPaths_DottedSecretNameHashes(t *testing.T) { + cases := []string{ + "corp.ca", + "mcp.example.com-tls", + "my.cert.bundle", + } + for _, name := range cases { + t.Run(name, func(t *testing.T) { + volumeName, mountPath, certPath := tlsCAPaths(name, "ca.crt") + assert.NotContains(t, volumeName, ".", + "Volume name must not contain dots (DNS_LABEL) — hash when Secret name does") + assert.Regexp(t, `^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`, volumeName, + "Volume name must satisfy RFC 1123 label") + assert.LessOrEqual(t, len(volumeName), 63, "Volume name must fit DNS_LABEL") + assert.Equal(t, certPath, path.Join(mountPath, "ca.crt")) + }) + } +} + +// Test_tlsCAPaths_Deterministic verifies that two calls with the same input +// produce identical output, including when hashing kicks in. The merge +// dedupe relies on identical volume names + mount paths for the same +// Secret regardless of which call site produced them. +func Test_tlsCAPaths_Deterministic(t *testing.T) { + for _, name := range []string{"short-name", "corp.ca", "a-very-long-secret-name-that-just-exceeds-the-volume-limit-x"} { + v1, m1, c1 := tlsCAPaths(name, "ca.crt") + v2, m2, c2 := tlsCAPaths(name, "ca.crt") + assert.Equal(t, v1, v2, "tlsCAPaths must be deterministic for volume name (%s)", name) + assert.Equal(t, m1, m2, "tlsCAPaths must be deterministic for mount path (%s)", name) + assert.Equal(t, c1, c2, "tlsCAPaths must be deterministic for cert path (%s)", name) + } } diff --git a/go/core/internal/httpserver/handlers/companion_secrets.go b/go/core/internal/httpserver/handlers/companion_secrets.go new file mode 100644 index 0000000000..aa38b7a671 --- /dev/null +++ b/go/core/internal/httpserver/handlers/companion_secrets.go @@ -0,0 +1,184 @@ +package handlers + +import ( + "context" + stderrors "errors" + "fmt" + "maps" + "strings" + + "github.com/go-logr/logr" + api "github.com/kagent-dev/kagent/go/api/httpapi" + "github.com/kagent-dev/kagent/go/core/internal/httpserver/errors" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/validation" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// errInvalidCompanionSecret tags errors that should surface as a 400 Bad +// Request to the API caller (e.g. an existing Secret has the wrong type +// or is owned by a different resource). +var errInvalidCompanionSecret = stderrors.New("invalid companion secret") + +// companionSecretAPIError translates a companion-Secret error into the +// appropriate HTTP API error. errInvalidCompanionSecret indicates +// caller-fixable conditions and surfaces as 400; anything else is an +// internal failure (kube API error, marshal failure, etc.). +func companionSecretAPIError(err error) *errors.APIError { + if stderrors.Is(err, errInvalidCompanionSecret) { + return errors.NewBadRequestError(err.Error(), err) + } + return errors.NewInternalServerError("Failed to create or update companion secrets", err) +} + +// rollbackOwnerOnCompanionSecretFailure deletes the owner resource the +// caller just created when the companion-Secret pass that follows fails. +// Use it to close the partial-failure window where the owner is in K8s +// but its referenced Secrets aren't — the operator's retry of the same +// POST would otherwise hit AlreadyExists on the owner without realizing +// the prior attempt half-succeeded. Best-effort: a delete failure is +// logged via the caller's logger but does not change the outer error; +// the caller already surfaces the companion-Secret error to the client. +func rollbackOwnerOnCompanionSecretFailure(ctx context.Context, kubeClient client.Client, owner client.Object, log logr.Logger) { + if err := kubeClient.Delete(ctx, owner); err != nil && !apierrors.IsNotFound(err) { + log.Error(err, "failed to roll back owner after companion-secret failure", + "kind", owner.GetObjectKind().GroupVersionKind().Kind, + "namespace", owner.GetNamespace(), + "name", owner.GetName()) + } +} + +// validateSecretMaterials checks each SecretMaterial's name and key +// against Kubernetes naming rules. Returns a single error for the first +// invalid material so the caller can return a 400 with a precise reason. +func validateSecretMaterials(secrets []api.SecretMaterial) error { + for _, secret := range secrets { + if errs := validation.IsDNS1123Subdomain(secret.Name); len(errs) > 0 { + return fmt.Errorf("invalid secret name %q: %s", secret.Name, strings.Join(errs, "; ")) + } + if errs := validation.IsConfigMapKey(secret.Key); len(errs) > 0 { + return fmt.Errorf("invalid key %q for secret %q: %s", secret.Key, secret.Name, strings.Join(errs, "; ")) + } + } + return nil +} + +// createOrUpdateCompanionSecrets writes each SecretMaterial as an Opaque +// Secret in the owner's namespace, with an OwnerReference back to the +// owner so K8s garbage collection cleans them up when the parent is +// deleted. Materials grouped by `name` accumulate keys into a single +// Secret object. +// +// When a Secret with the same name already exists, the helper merges +// the new keys into the existing Data. Two safety checks: +// - the existing Secret's type must be Opaque (mismatched types are a +// 400-class error because the caller's payload is incompatible). +// - the existing Secret must already carry an OwnerReference back to +// this owner (a Secret managed by someone else is not safe to +// mutate from a different parent's create/update path). +// +// Caller is responsible for ensuring the owner has been written to the +// API server first (so .GetUID() is populated) — the OwnerReference +// uses the owner's live UID. +func createOrUpdateCompanionSecrets( + ctx context.Context, + kubeClient client.Client, + owner client.Object, + gvk schema.GroupVersionKind, + secrets []api.SecretMaterial, +) error { + // Group secrets by name and key. + secretsByName := map[string]map[string][]byte{} + for _, secret := range secrets { + if _, ok := secretsByName[secret.Name]; !ok { + secretsByName[secret.Name] = map[string][]byte{} + } + secretsByName[secret.Name][secret.Key] = []byte(secret.Value) + } + + namespace := owner.GetNamespace() + for name, data := range secretsByName { + existingSecret := &corev1.Secret{} + err := kubeClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, existingSecret) + if err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to get companion secret %s/%s: %w", namespace, name, err) + } + + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + OwnerReferences: []metav1.OwnerReference{ownerReferenceFor(owner, gvk)}, + }, + Type: corev1.SecretTypeOpaque, + Data: data, + } + if err := kubeClient.Create(ctx, secret); err != nil { + return fmt.Errorf("failed to create companion secret %s/%s: %w", namespace, name, err) + } + continue + } + + if existingSecret.Type != corev1.SecretTypeOpaque { + return fmt.Errorf("%w: companion secret %s/%s must be type %q, got %q", errInvalidCompanionSecret, namespace, name, corev1.SecretTypeOpaque, existingSecret.Type) + } + if !isOwnedBy(existingSecret, owner, gvk) { + return fmt.Errorf("%w: companion secret %s/%s is not managed by %s %s/%s", errInvalidCompanionSecret, namespace, name, gvk.Kind, owner.GetNamespace(), owner.GetName()) + } + + if existingSecret.Data == nil { + existingSecret.Data = map[string][]byte{} + } + maps.Copy(existingSecret.Data, data) + if err := kubeClient.Update(ctx, existingSecret); err != nil { + return fmt.Errorf("failed to update companion secret %s/%s: %w", namespace, name, err) + } + } + + return nil +} + +// ownerReferenceFor returns an OwnerReference pointing at the given +// owner. GVK is taken explicitly rather than from owner's TypeMeta +// because objects roundtripped through client.Get often have empty +// TypeMeta; the caller knows the kind at the call site. +func ownerReferenceFor(owner client.Object, gvk schema.GroupVersionKind) metav1.OwnerReference { + controller := true + return metav1.OwnerReference{ + APIVersion: gvk.GroupVersion().Identifier(), + Kind: gvk.Kind, + Name: owner.GetName(), + UID: owner.GetUID(), + Controller: &controller, + } +} + +// isOwnedBy reports whether the secret carries an OwnerReference back +// to the given owner. Compares APIVersion, Kind, Name, AND UID — the UID +// match is load-bearing because a delete-recreate of an owner with the +// same name issues a fresh UID, and the prior owner's secrets may still +// be visible through K8s GC's deletion delay. Requires a non-empty UID +// on the caller's owner; in production K8s always populates this after +// Create, so a zero UID here means a programming error (calling the +// helper before persisting the owner). +func isOwnedBy(secret *corev1.Secret, owner client.Object, gvk schema.GroupVersionKind) bool { + if owner.GetUID() == "" { + return false + } + for _, ownerRef := range secret.GetOwnerReferences() { + if ownerRef.APIVersion != gvk.GroupVersion().Identifier() || + ownerRef.Kind != gvk.Kind || + ownerRef.Name != owner.GetName() { + continue + } + if ownerRef.UID != owner.GetUID() { + continue + } + return true + } + return false +} diff --git a/go/core/internal/httpserver/handlers/modelconfig.go b/go/core/internal/httpserver/handlers/modelconfig.go index a07c313e90..045a9c3718 100644 --- a/go/core/internal/httpserver/handlers/modelconfig.go +++ b/go/core/internal/httpserver/handlers/modelconfig.go @@ -1,26 +1,22 @@ package handlers import ( - "context" "encoding/json" - stderrors "errors" "fmt" - "maps" "net/http" "strings" - api "github.com/kagent-dev/kagent/go/api/httpapi" "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/internal/httpserver/errors" common "github.com/kagent-dev/kagent/go/core/internal/utils" "github.com/kagent-dev/kagent/go/core/pkg/auth" - corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/validation" "sigs.k8s.io/controller-runtime/pkg/client" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + + api "github.com/kagent-dev/kagent/go/api/httpapi" ) // ModelConfigHandler handles ModelConfiguration requests @@ -190,8 +186,13 @@ func (h *ModelConfigHandler) HandleCreateModelConfig(w ErrorResponseWriter, r *h } } - if err := createOrUpdateCompanionSecrets(r.Context(), h.KubeClient, modelConfig, req.Secrets); err != nil { + if err := createOrUpdateCompanionSecrets(r.Context(), h.KubeClient, modelConfig, modelConfigGVK, req.Secrets); err != nil { log.Error(err, "Failed to create or update companion secrets") + // Close the partial-failure window: the ModelConfig is in K8s + // but its companion Secrets aren't. The operator's retry would + // otherwise hit AlreadyExists on the ModelConfig without a hint + // that the prior attempt half-succeeded. + rollbackOwnerOnCompanionSecretFailure(r.Context(), h.KubeClient, modelConfig, log) w.RespondWithError(companionSecretAPIError(err)) return } @@ -280,7 +281,7 @@ func (h *ModelConfigHandler) HandleUpdateModelConfig(w ErrorResponseWriter, r *h log.V(1).Info("Successfully updated API key secret") } - if err := createOrUpdateCompanionSecrets(r.Context(), h.KubeClient, modelConfig, req.Secrets); err != nil { + if err := createOrUpdateCompanionSecrets(r.Context(), h.KubeClient, modelConfig, modelConfigGVK, req.Secrets); err != nil { log.Error(err, "Failed to create or update companion secrets") w.RespondWithError(companionSecretAPIError(err)) return @@ -350,107 +351,7 @@ func validateAPIKeySecretRef(apiKeySecret, apiKeySecretKey string, provider v1al return nil } -func validateSecretMaterials(secrets []api.SecretMaterial) error { - for _, secret := range secrets { - if errs := validation.IsDNS1123Subdomain(secret.Name); len(errs) > 0 { - return fmt.Errorf("invalid secret name %q: %s", secret.Name, strings.Join(errs, "; ")) - } - if errs := validation.IsConfigMapKey(secret.Key); len(errs) > 0 { - return fmt.Errorf("invalid key %q for secret %q: %s", secret.Key, secret.Name, strings.Join(errs, "; ")) - } - } - return nil -} - -var errInvalidCompanionSecret = stderrors.New("invalid companion secret") - -// companionSecretAPIError returns an API error for companion secret validation errors. -func companionSecretAPIError(err error) *errors.APIError { - if stderrors.Is(err, errInvalidCompanionSecret) { - return errors.NewBadRequestError(err.Error(), err) - } - return errors.NewInternalServerError("Failed to create or update companion secrets", err) -} - -func createOrUpdateCompanionSecrets(ctx context.Context, kubeClient client.Client, owner *v1alpha2.ModelConfig, secrets []api.SecretMaterial) error { - // Group secrets by name and key. - secretsByName := map[string]map[string][]byte{} - for _, secret := range secrets { - if _, ok := secretsByName[secret.Name]; !ok { - secretsByName[secret.Name] = map[string][]byte{} - } - secretsByName[secret.Name][secret.Key] = []byte(secret.Value) - } - - namespace := owner.GetNamespace() - for name, data := range secretsByName { - existingSecret := &corev1.Secret{} - // Get the existing secret by name and namespace. - err := kubeClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, existingSecret) - if err != nil { - if !apierrors.IsNotFound(err) { - return fmt.Errorf("failed to get companion secret %s/%s: %w", namespace, name, err) - } - - // Create the secret if it doesn't exist. - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - OwnerReferences: []metav1.OwnerReference{modelConfigOwnerReference(owner)}, - }, - Type: corev1.SecretTypeOpaque, - Data: data, - } - if err := kubeClient.Create(ctx, secret); err != nil { - return fmt.Errorf("failed to create companion secret %s/%s: %w", namespace, name, err) - } - continue - } - - if existingSecret.Type != corev1.SecretTypeOpaque { - return fmt.Errorf("%w: companion secret %s/%s must be type %q, got %q", errInvalidCompanionSecret, namespace, name, corev1.SecretTypeOpaque, existingSecret.Type) - } - if !isOwnedByModelConfig(existingSecret, owner) { - return fmt.Errorf("%w: companion secret %s/%s is not managed by ModelConfig %s/%s", errInvalidCompanionSecret, namespace, name, owner.GetNamespace(), owner.GetName()) - } - - if existingSecret.Data == nil { - existingSecret.Data = map[string][]byte{} - } - maps.Copy(existingSecret.Data, data) - if err := kubeClient.Update(ctx, existingSecret); err != nil { - return fmt.Errorf("failed to update companion secret %s/%s: %w", namespace, name, err) - } - } - - return nil -} - -// modelConfigOwnerReference returns the owner reference for the model config. -func modelConfigOwnerReference(owner *v1alpha2.ModelConfig) metav1.OwnerReference { - controller := true - return metav1.OwnerReference{ - APIVersion: v1alpha2.GroupVersion.Identifier(), - Kind: "ModelConfig", - Name: owner.GetName(), - UID: owner.GetUID(), - Controller: &controller, - } -} - -// isOwnedByModelConfig checks if the secret is owned by the model config. -func isOwnedByModelConfig(secret *corev1.Secret, owner *v1alpha2.ModelConfig) bool { - for _, ownerRef := range secret.GetOwnerReferences() { - if ownerRef.APIVersion != v1alpha2.GroupVersion.Identifier() || - ownerRef.Kind != "ModelConfig" || - ownerRef.Name != owner.GetName() { - continue - } - if owner.GetUID() != "" && ownerRef.UID != owner.GetUID() { - continue - } - return true - } - return false -} +// modelConfigGVK is passed to companion-secret helpers so the +// OwnerReference and isOwnedBy check use the right Kind for this +// resource. +var modelConfigGVK = v1alpha2.GroupVersion.WithKind("ModelConfig") diff --git a/go/core/internal/httpserver/handlers/toolservers.go b/go/core/internal/httpserver/handlers/toolservers.go index e061b670c0..24cd60ac9c 100644 --- a/go/core/internal/httpserver/handlers/toolservers.go +++ b/go/core/internal/httpserver/handlers/toolservers.go @@ -39,8 +39,29 @@ type ToolServerCreateRequest struct { // MCPServer is used when Type is "MCPServer" MCPServer *v1alpha1.MCPServer `json:"mcpServer,omitempty"` + + // Secrets are optional companion Secrets to create or update + // alongside the ToolServer. Each entry materializes as a key in a + // Kubernetes Secret of type Opaque, owned by the created + // ToolServer so K8s GC cleans them up on delete. Names referenced + // here (e.g. RemoteMCPServer.spec.tls.caCertSecretRef, + // RemoteMCPServer.spec.headersFrom[].valueFrom.secretRef.name, + // MCPServer.spec.secretRefs[].name) must match a Secret described + // in this list when the operator wants the API to materialize the + // content inline. Pre-existing Secrets can also be referenced + // directly without supplying material here. + Secrets []api.SecretMaterial `json:"secrets,omitempty"` } +// remoteMCPServerGVK and mcpServerGVK are passed to the +// companion-secret helpers so the OwnerReference and ownership check +// use the right Kind. kmcp.MCPServer shares the kagent.dev group with +// v1alpha2 (see kmcp/api/v1alpha1/groupversion_info.go). +var ( + remoteMCPServerGVK = v1alpha2.GroupVersion.WithKind("RemoteMCPServer") + mcpServerGVK = v1alpha1.GroupVersion.WithKind("MCPServer") +) + // HandleListToolServers handles GET /api/toolservers requests func (h *ToolServersHandler) HandleListToolServers(w ErrorResponseWriter, r *http.Request) { log := ctrllog.FromContext(r.Context()).WithName("toolservers-handler").WithValues("operation", "list") @@ -113,20 +134,20 @@ func (h *ToolServersHandler) HandleCreateToolServer(w ErrorResponseWriter, r *ht w.RespondWithError(errors.NewBadRequestError("RemoteMCPServer data is required when type is RemoteMCPServer", nil)) return } - h.handleCreateRemoteMCPServer(w, r, toolServerRequest.RemoteMCPServer, log) + h.handleCreateRemoteMCPServer(w, r, toolServerRequest.RemoteMCPServer, toolServerRequest.Secrets, log) case ToolServerTypeMCPServer: if toolServerRequest.MCPServer == nil { w.RespondWithError(errors.NewBadRequestError("MCPServer data is required when type is MCPServer", nil)) return } - h.handleCreateMCPServer(w, r, toolServerRequest.MCPServer, log) + h.handleCreateMCPServer(w, r, toolServerRequest.MCPServer, toolServerRequest.Secrets, log) default: w.RespondWithError(errors.NewBadRequestError(fmt.Sprintf("Invalid tool server type. Must be one of %s", toolServerTypes.Join(", ")), nil)) } } // handleCreateRemoteMCPServer handles the creation of a RemoteMCPServer -func (h *ToolServersHandler) handleCreateRemoteMCPServer(w ErrorResponseWriter, r *http.Request, toolServerRequest *v1alpha2.RemoteMCPServer, log logr.Logger) { +func (h *ToolServersHandler) handleCreateRemoteMCPServer(w ErrorResponseWriter, r *http.Request, toolServerRequest *v1alpha2.RemoteMCPServer, secrets []api.SecretMaterial, log logr.Logger) { if toolServerRequest.Namespace == "" { toolServerRequest.Namespace = common.GetResourceNamespace() } @@ -144,19 +165,42 @@ func (h *ToolServersHandler) handleCreateRemoteMCPServer(w ErrorResponseWriter, "toolServerName", toolRef.Name, "toolServerNamespace", toolRef.Namespace, ) + if err := Check(h.Authorizer, r, auth.Resource{Type: "ToolServer", Name: toolRef.String()}); err != nil { + w.RespondWithError(err) + return + } + + // validateSecretMaterials runs after authz so an unauthorized caller + // gets 403 regardless of payload shape — keeps the error surface + // dependent only on authz, not on request structure. + if err := validateSecretMaterials(secrets); err != nil { + w.RespondWithError(errors.NewBadRequestError(err.Error(), err)) + return + } if err := h.KubeClient.Create(r.Context(), toolServerRequest); err != nil { w.RespondWithError(errors.NewInternalServerError("Failed to create RemoteMCPServer in Kubernetes", err)) return } + if err := createOrUpdateCompanionSecrets(r.Context(), h.KubeClient, toolServerRequest, remoteMCPServerGVK, secrets); err != nil { + log.Error(err, "Failed to create or update companion secrets") + // Close the partial-failure window: the RMS is already in K8s + // but its companion Secrets aren't. Leaving it would force the + // operator to delete-then-retry on a confusing 500 AlreadyExists + // on the next POST. Roll back, surface the original error. + rollbackOwnerOnCompanionSecretFailure(r.Context(), h.KubeClient, toolServerRequest, log) + w.RespondWithError(companionSecretAPIError(err)) + return + } + log.Info("Successfully created RemoteMCPServer") data := api.NewResponse(toolServerRequest, "Successfully created RemoteMCPServer", false) RespondWithJSON(w, http.StatusCreated, data) } // handleCreateMCPServer handles the creation of an MCPServer (stdio-based) -func (h *ToolServersHandler) handleCreateMCPServer(w ErrorResponseWriter, r *http.Request, toolServerRequest *v1alpha1.MCPServer, log logr.Logger) { +func (h *ToolServersHandler) handleCreateMCPServer(w ErrorResponseWriter, r *http.Request, toolServerRequest *v1alpha1.MCPServer, secrets []api.SecretMaterial, log logr.Logger) { if toolServerRequest.Namespace == "" { toolServerRequest.Namespace = common.GetResourceNamespace() } @@ -179,11 +223,23 @@ func (h *ToolServersHandler) handleCreateMCPServer(w ErrorResponseWriter, r *htt return } + if err := validateSecretMaterials(secrets); err != nil { + w.RespondWithError(errors.NewBadRequestError(err.Error(), err)) + return + } + if err := h.KubeClient.Create(r.Context(), toolServerRequest); err != nil { w.RespondWithError(errors.NewInternalServerError("Failed to create MCPServer in Kubernetes", err)) return } + if err := createOrUpdateCompanionSecrets(r.Context(), h.KubeClient, toolServerRequest, mcpServerGVK, secrets); err != nil { + log.Error(err, "Failed to create or update companion secrets") + rollbackOwnerOnCompanionSecretFailure(r.Context(), h.KubeClient, toolServerRequest, log) + w.RespondWithError(companionSecretAPIError(err)) + return + } + log.Info("Successfully created MCPServer") data := api.NewResponse(toolServerRequest, "Successfully created MCPServer", false) RespondWithJSON(w, http.StatusCreated, data) diff --git a/go/core/internal/httpserver/handlers/toolservers_test.go b/go/core/internal/httpserver/handlers/toolservers_test.go index 79d0caadce..07d53a9224 100644 --- a/go/core/internal/httpserver/handlers/toolservers_test.go +++ b/go/core/internal/httpserver/handlers/toolservers_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -27,9 +28,22 @@ import ( "github.com/kagent-dev/kagent/go/core/internal/httpserver/auth" "github.com/kagent-dev/kagent/go/core/internal/httpserver/handlers" common "github.com/kagent-dev/kagent/go/core/internal/utils" + pkgauth "github.com/kagent-dev/kagent/go/core/pkg/auth" "github.com/kagent-dev/kmcp/api/v1alpha1" ) +// denyAuthorizer satisfies pkgauth.Authorizer by refusing every Check. +// Used to pin the authorization gate on the create endpoints: a request +// from an unauthorized caller must surface a 403 BEFORE the handler +// reaches KubeClient.Create or createOrUpdateCompanionSecrets. +type denyAuthorizer struct{} + +func (denyAuthorizer) Check(_ context.Context, _ pkgauth.Principal, _ pkgauth.Verb, _ pkgauth.Resource) error { + return assert.AnError +} + +var _ pkgauth.Authorizer = denyAuthorizer{} + func TestToolServersHandler(t *testing.T) { scheme := runtime.NewScheme() @@ -377,6 +391,324 @@ func TestToolServersHandler(t *testing.T) { require.NotNil(t, responseRecorder.errorReceived) }) + // SecretMaterials companion-Secret support mirrors the ModelConfig + // inline-Secret pattern so operators can create an RMS/MCPServer + // and its referenced Secrets in a single POST without + // pre-creating Secret objects out of band. + t.Run("Success_RemoteMCPServer_WithSecretMaterials_CreatesCASecret", func(t *testing.T) { + handler, kubeClient, _, responseRecorder := setupHandler(t) + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "RemoteMCPServer", + RemoteMCPServer: &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "corp-mcp", Namespace: "default"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "Corp-CA MCP", + URL: "https://mcp.corp.internal/mcp", + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: "corp-ca", + CACertSecretKey: "ca.crt", + }, + }, + }, + Secrets: []api.SecretMaterial{ + {Name: "corp-ca", Key: "ca.crt", Value: "FAKE PEM"}, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "test-user") + + handler.HandleCreateToolServer(responseRecorder, req) + require.Equal(t, http.StatusCreated, responseRecorder.Code) + + // Companion Secret created in the same namespace. + secret := &corev1.Secret{} + err := kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "corp-ca"}, secret) + require.NoError(t, err) + assert.Equal(t, corev1.SecretTypeOpaque, secret.Type) + assert.Equal(t, []byte("FAKE PEM"), secret.Data["ca.crt"]) + + // OwnerReference points back at the RMS so K8s GC cleans it up. + require.Len(t, secret.OwnerReferences, 1) + or := secret.OwnerReferences[0] + assert.Equal(t, "RemoteMCPServer", or.Kind) + assert.Equal(t, "corp-mcp", or.Name) + assert.Equal(t, v1alpha2.GroupVersion.Identifier(), or.APIVersion) + }) + + t.Run("Success_MCPServer_WithSecretMaterials_CreatesEnvSecret", func(t *testing.T) { + handler, kubeClient, _, responseRecorder := setupHandler(t) + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "MCPServer", + MCPServer: &v1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "kmcp-with-creds", Namespace: "default"}, + Spec: v1alpha1.MCPServerSpec{ + Deployment: v1alpha1.MCPServerDeployment{ + Image: "example/kmcp:latest", + Port: 8080, + Cmd: "/bin/serve", + SecretRefs: []corev1.LocalObjectReference{ + {Name: "kmcp-creds"}, + }, + }, + }, + }, + Secrets: []api.SecretMaterial{ + {Name: "kmcp-creds", Key: "API_TOKEN", Value: "shhh"}, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "test-user") + + handler.HandleCreateToolServer(responseRecorder, req) + require.Equal(t, http.StatusCreated, responseRecorder.Code) + + secret := &corev1.Secret{} + err := kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "kmcp-creds"}, secret) + require.NoError(t, err) + assert.Equal(t, []byte("shhh"), secret.Data["API_TOKEN"]) + require.Len(t, secret.OwnerReferences, 1) + or := secret.OwnerReferences[0] + assert.Equal(t, "MCPServer", or.Kind) + assert.Equal(t, "kmcp-with-creds", or.Name) + }) + + t.Run("SecretMaterial_GroupsMultipleKeysIntoSingleSecret", func(t *testing.T) { + handler, kubeClient, _, responseRecorder := setupHandler(t) + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "RemoteMCPServer", + RemoteMCPServer: &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "multi-secret-mcp", Namespace: "default"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "RMS with multi-key Secret", + URL: "https://mcp.corp.internal/mcp", + }, + }, + Secrets: []api.SecretMaterial{ + {Name: "shared", Key: "ca.crt", Value: "PEM"}, + {Name: "shared", Key: "token", Value: "abc"}, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "test-user") + + handler.HandleCreateToolServer(responseRecorder, req) + require.Equal(t, http.StatusCreated, responseRecorder.Code) + + secret := &corev1.Secret{} + err := kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "shared"}, secret) + require.NoError(t, err) + assert.Equal(t, []byte("PEM"), secret.Data["ca.crt"]) + assert.Equal(t, []byte("abc"), secret.Data["token"]) + }) + + t.Run("SecretMaterial_InvalidName_Rejected", func(t *testing.T) { + handler, _, _, responseRecorder := setupHandler(t) + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "RemoteMCPServer", + RemoteMCPServer: &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "rms-invalid-secret", Namespace: "default"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "x", URL: "https://x/y", + }, + }, + Secrets: []api.SecretMaterial{ + {Name: "INVALID NAME WITH SPACES", Key: "ca.crt", Value: "x"}, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "test-user") + + handler.HandleCreateToolServer(responseRecorder, req) + assert.Equal(t, http.StatusBadRequest, responseRecorder.Code) + }) + + t.Run("SecretMaterial_ExistingSecretNotOwned_Rejected", func(t *testing.T) { + handler, kubeClient, _, responseRecorder := setupHandler(t) + + // Pre-create a Secret that isn't owned by any RMS. + preexisting := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "stranger", Namespace: "default"}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"ca.crt": []byte("OLD")}, + } + require.NoError(t, kubeClient.Create(context.Background(), preexisting)) + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "RemoteMCPServer", + RemoteMCPServer: &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "stranger-rms", Namespace: "default"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "x", URL: "https://x/y", + }, + }, + Secrets: []api.SecretMaterial{ + {Name: "stranger", Key: "ca.crt", Value: "NEW"}, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "test-user") + + handler.HandleCreateToolServer(responseRecorder, req) + // The 400 surfaces from companionSecretAPIError when the + // existing Secret isn't already owned by this RMS. + assert.Equal(t, http.StatusBadRequest, responseRecorder.Code) + + // Confirm the unrelated Secret wasn't mutated. + fresh := &corev1.Secret{} + err := kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "stranger"}, fresh) + require.NoError(t, err) + assert.Equal(t, []byte("OLD"), fresh.Data["ca.crt"]) + + // Companion-secret failure must roll back the RMS so the + // operator's retry doesn't hit AlreadyExists. Pins the + // partial-failure fix; without rollback the orphan would + // be readable here. + orphan := &v1alpha2.RemoteMCPServer{} + err = kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "stranger-rms"}, orphan) + assert.True(t, apierrors.IsNotFound(err), + "RMS must be rolled back when companion-secret creation fails; got err=%v", err) + }) + + // CompanionSecretFailure_RollsBackMCPServer pins the symmetric + // rollback behavior on the kmcp MCPServer create path so a + // regression on either branch surfaces in CI. + t.Run("CompanionSecretFailure_RollsBackMCPServer", func(t *testing.T) { + handler, kubeClient, _, responseRecorder := setupHandler(t) + + preexisting := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "stranger-kmcp", Namespace: "default"}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"x": []byte("OLD")}, + } + require.NoError(t, kubeClient.Create(context.Background(), preexisting)) + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "MCPServer", + MCPServer: &v1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "stranger-mcp", Namespace: "default"}, + Spec: v1alpha1.MCPServerSpec{ + Deployment: v1alpha1.MCPServerDeployment{ + Image: "example/kmcp:latest", + Port: 8080, + Cmd: "/bin/serve", + }, + }, + }, + Secrets: []api.SecretMaterial{ + {Name: "stranger-kmcp", Key: "x", Value: "NEW"}, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "test-user") + + handler.HandleCreateToolServer(responseRecorder, req) + assert.Equal(t, http.StatusBadRequest, responseRecorder.Code) + + orphan := &v1alpha1.MCPServer{} + err := kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "stranger-mcp"}, orphan) + assert.True(t, apierrors.IsNotFound(err), + "MCPServer must be rolled back when companion-secret creation fails; got err=%v", err) + }) + + // AuthorizationRequired_RemoteMCPServer pins the authz gate on the + // RMS create path. A caller the authorizer rejects must get a 403 + // AND no RMS, no companion Secret should land in the cluster. + t.Run("AuthorizationRequired_RemoteMCPServer", func(t *testing.T) { + handler, kubeClient, _, responseRecorder := setupHandler(t) + // Swap the Noop authorizer for one that denies every Check. + handler.Authorizer = denyAuthorizer{} + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "RemoteMCPServer", + RemoteMCPServer: &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "denied-rms", Namespace: "default"}, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "should not be created", + URL: "https://x/y", + }, + }, + Secrets: []api.SecretMaterial{ + {Name: "denied-rms-ca", Key: "ca.crt", Value: "PEM"}, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "unauthorized-user") + + handler.HandleCreateToolServer(responseRecorder, req) + + assert.Equal(t, http.StatusForbidden, responseRecorder.Code, + "unauthorized RMS create must surface 403") + // Neither the RMS nor the companion Secret should have been + // created — the authz gate fires before any KubeClient.Create. + rms := &v1alpha2.RemoteMCPServer{} + err := kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "denied-rms"}, rms) + assert.Error(t, err, "denied request must not create the RemoteMCPServer") + secret := &corev1.Secret{} + err = kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "denied-rms-ca"}, secret) + assert.Error(t, err, "denied request must not create the companion Secret") + }) + + // AuthorizationRequired_MCPServer pins the symmetric authz gate + // on the kmcp MCPServer create path, so a regression on either + // branch surfaces in the test suite. + t.Run("AuthorizationRequired_MCPServer", func(t *testing.T) { + handler, kubeClient, _, responseRecorder := setupHandler(t) + handler.Authorizer = denyAuthorizer{} + + reqBody := &handlers.ToolServerCreateRequest{ + Type: "MCPServer", + MCPServer: &v1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "denied-mcp", Namespace: "default"}, + Spec: v1alpha1.MCPServerSpec{ + Deployment: v1alpha1.MCPServerDeployment{ + Image: "example/kmcp:latest", + Port: 8080, + Cmd: "/bin/serve", + }, + }, + }, + } + jsonBody, _ := json.Marshal(reqBody) + req := httptest.NewRequest("POST", "/api/toolservers/", bytes.NewBuffer(jsonBody)) + req.Header.Set("Content-Type", "application/json") + req = setUser(req, "unauthorized-user") + + handler.HandleCreateToolServer(responseRecorder, req) + + assert.Equal(t, http.StatusForbidden, responseRecorder.Code, + "unauthorized MCPServer create must surface 403") + mcp := &v1alpha1.MCPServer{} + err := kubeClient.Get(context.Background(), + ctrl_client.ObjectKey{Namespace: "default", Name: "denied-mcp"}, mcp) + assert.Error(t, err, "denied request must not create the MCPServer") + }) + t.Run("ToolServerAlreadyExists", func(t *testing.T) { handler, kubeClient, _, responseRecorder := setupHandler(t) diff --git a/go/core/pkg/app/app.go b/go/core/pkg/app/app.go index d47ab55adf..e3bb2dcc5a 100644 --- a/go/core/pkg/app/app.go +++ b/go/core/pkg/app/app.go @@ -292,6 +292,11 @@ type ExtensionConfig struct { AgentPlugins []agent_translator.TranslatorPlugin MCPServerPlugins []translator.MCPTranslatorPlugin SandboxBackend sandboxbackend.Backend + + // RemoteMCPServerURLRewriter optionally transforms the URL the + // controller dials when discovering tools on a RemoteMCPServer. + // Nil means use the spec.URL verbatim. + RemoteMCPServerURLRewriter translator.RemoteMCPServerURLRewriter } type GetExtensionConfig func(bootstrap BootstrapConfig) (*ExtensionConfig, error) @@ -527,6 +532,7 @@ func Start(getExtensionConfig GetExtensionConfig, migrationRunner MigrationRunne cfg.DefaultModelConfig, watchNamespacesList, extensionCfg.SandboxBackend, + extensionCfg.RemoteMCPServerURLRewriter, ) if err := (&controller.ServiceController{ diff --git a/go/core/pkg/translator/adk_api_translator_types.go b/go/core/pkg/translator/adk_api_translator_types.go index 07fd4268d5..1f431d0305 100644 --- a/go/core/pkg/translator/adk_api_translator_types.go +++ b/go/core/pkg/translator/adk_api_translator_types.go @@ -20,3 +20,21 @@ type TranslatorPlugin interface { ProcessAgent(ctx context.Context, agent v1alpha2.AgentObject, outputs *AgentOutputs) error GetOwnedResourceTypes() []client.Object } + +// RemoteMCPServerURLRewriter optionally transforms the URL the kagent +// controller dials when discovering tools on a RemoteMCPServer. +// +// The reconciler treats the returned string as the dial target. +// Returning the input URL unchanged is the correct behavior when the +// rewriter does not apply. A nil rewriter means the controller dials +// the RemoteMCPServer spec.URL verbatim. +// +// The hook controls the URL only. TLS configuration is built from +// spec.tls independently and applied to the underlying http.Client. +// Implementations that change scheme (e.g. https:// → http:// to route +// through a destination that handles TLS elsewhere) must ensure +// spec.tls remains consistent with the new dial target. The rewriter +// currently cannot supply TLS config of its own. +type RemoteMCPServerURLRewriter interface { + RewriteRemoteMCPServerURL(ctx context.Context, remoteMCPServer *v1alpha2.RemoteMCPServer) (string, error) +} diff --git a/go/core/test/e2e/mocks/invoke_remotemcpserver_tls_agent.json b/go/core/test/e2e/mocks/invoke_remotemcpserver_tls_agent.json new file mode 100644 index 0000000000..5f35559cae --- /dev/null +++ b/go/core/test/e2e/mocks/invoke_remotemcpserver_tls_agent.json @@ -0,0 +1,66 @@ +{ + "openai": [ + { + "name": "initial_request", + "match": { + "match_type": "contains", + "message": { + "content": "add 2 and 3", + "role": "user" + } + }, + "response": { + "id": "chatcmpl-1", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "role": "assistant", + "message": { + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "add_numbers", + "arguments": "{\"a\": 2, \"b\": 3}" + } + } + ] + }, + "finish_reason": "tool_calls" + } + ] + } + }, + { + "name": "add_response", + "match": { + "match_type": "contains", + "message": { + "content": "5", + "role": "tool", + "tool_call_id": "call_1" + } + }, + "response": { + "id": "call_1", + "object": "chat.completion.tool_message", + "created": 1677652288, + "model": "gpt-4.1-mini", + "choices": [ + { + "index": 0, + "message": { + "content": "I have added 2 and 3. The result is 5.", + "role": "assistant" + } + } + ] + } + } + ] +} diff --git a/go/core/test/e2e/remotemcpserver_tls_test.go b/go/core/test/e2e/remotemcpserver_tls_test.go new file mode 100644 index 0000000000..c40331a0b8 --- /dev/null +++ b/go/core/test/e2e/remotemcpserver_tls_test.go @@ -0,0 +1,556 @@ +// E2E coverage for RemoteMCPServer TLS support and the tool-server +// companion-Secret API. These tests stand up a real mockmcp fixture +// (in-process, optionally with TLS) on the test host, install matching +// kagent resources in the cluster, drive an agent invocation through +// the A2A endpoint, and assert on the mockmcp request recorder. +// +// Prerequisites (mirror the existing e2e tests in invoke_api_test.go): +// +// - A kind cluster with kagent installed. +// - `kubectl port-forward -n kagent deployments/kagent-controller 8083` +// (or KAGENT_URL set) so the tests can reach the HTTP API. +// - The cluster must be able to dial the test host on `host.docker.internal` +// (Mac) / `172.17.0.1` (Linux) — same indirection mockllm uses; +// buildK8sURL() in invoke_api_test.go does the translation. + +package e2e_test + +import ( + "bytes" + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/json" + "encoding/pem" + "fmt" + "math/big" + "net" + "net/http" + "os" + "strings" + "testing" + "time" + + "github.com/kagent-dev/kagent/go/api/httpapi" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/internal/httpserver/handlers" + "github.com/kagent-dev/mockmcp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// mockmcpFixture is a started mockmcp.Server plus the host-visible base +// URL agents and the controller should dial. baseURL routes through +// host.docker.internal (Mac) / 172.17.0.1 (Linux) so in-cluster pods can +// reach the test host — same shape as setupMockServer() does for the +// mock LLM. +type mockmcpFixture struct { + server *mockmcp.Server + baseURL string // dial URL from inside the cluster + mcpURL string // dial URL with the /mcp path appended + sseURL string // dial URL with the /sse path appended + caCertPEM []byte // when TLS, the PEM bundle to install as a Secret (nil for plaintext fixtures) + hostBindURL string // local 127.0.0.1 URL for debug logging — NOT the cluster-facing URL +} + +// setupMockMCP starts a mockmcp fixture. When withTLS is true a fresh +// self-signed cert is minted for the duration of the test and returned +// in caCertPEM so the caller can write it into a Secret for the RMS to +// reference. Server is registered with t.Cleanup; callers don't manage +// shutdown. +func setupMockMCP(t *testing.T, withTLS bool, opts mockmcp.Options) *mockmcpFixture { + t.Helper() + + // Bind on all interfaces so the controller pod (inside kind) can + // reach the listener via the kind-network gateway IP. Binding to + // "127.0.0.1:0" rejects connections from outside the loopback + // interface, which is what kind pod traffic looks like after + // routing through the network gateway. Ephemeral port still keeps + // concurrent tests from fighting. + if opts.Addr == "" { + opts.Addr = ":0" + } + opts.RecordRequests = true + + var caPEM []byte + if withTLS { + // Mint a self-signed CA + server cert. SANs must include + // whichever host alias / IP the in-cluster pod ends up + // dialing — that varies by network setup: + // * Mac without override: host.docker.internal (DNS) + // * Linux Docker default bridge: 172.17.0.1 + // * kind on Linux: kind network gateway (often 172.18.0.1) + // — passed via KAGENT_LOCAL_HOST in CI + dnsNames, ips := certSubjectAltNames() + certPEM, keyPEM, ca := generateSelfSignedCert(t, dnsNames, ips) + opts.CertPEM = certPEM + opts.KeyPEM = keyPEM + caPEM = ca + } + + server, err := mockmcp.NewServer(opts) + require.NoError(t, err) + + hostURL, err := server.Start(t.Context()) + require.NoError(t, err) + t.Cleanup(func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _ = server.Stop(ctx) + }) + + // buildK8sURL hard-codes "http://" — fine for the mockllm path it + // was written for, but mockmcp's TLS scenarios serve HTTPS and the + // scheme must round-trip to the controller's dial config. Reuse + // buildK8sURL to pick the right host (kind gateway IP / docker + // alias), then restore the actual scheme mockmcp listened on. + clusterURL := strings.Replace(buildK8sURL(hostURL), "http://", schemeOf(hostURL)+"://", 1) + + return &mockmcpFixture{ + server: server, + baseURL: clusterURL, + mcpURL: clusterURL + mockmcp.MCPPath, + sseURL: clusterURL + mockmcp.SSEPath, + caCertPEM: caPEM, + hostBindURL: hostURL, + } +} + +// schemeOf returns "http" or "https" depending on the URL's prefix. +// Used by setupMockMCP to preserve mockmcp's actual listening scheme +// when buildK8sURL rewrites the host (which it does by string-edit, +// not by url.Parse, so it can't be asked to preserve the scheme +// itself without changing its signature for one caller). +func schemeOf(rawURL string) string { + if strings.HasPrefix(rawURL, "https://") { + return "https" + } + return "http" +} + +// certSubjectAltNames returns the DNS-name and IP SAN lists for the +// mockmcp self-signed cert. Mirrors buildK8sURL's host-selection +// logic so the cert validates the same host the controller actually +// dials. Includes liberal fallbacks (kind default + Docker default) +// so the local-dev path doesn't require an explicit env override. +func certSubjectAltNames() ([]string, []net.IP) { + dns := []string{"localhost", "host.docker.internal"} + ips := []net.IP{ + net.ParseIP("127.0.0.1"), + net.ParseIP("172.17.0.1"), // Docker default bridge + net.ParseIP("172.18.0.1"), // common kind gateway + } + + // KAGENT_LOCAL_HOST is the dynamic gateway IP CI detects from + // `docker network inspect kind`. Include it as a SAN so the + // controller's TLS verification accepts whatever IP the runner + // happens to have allocated. buildK8sURL's OS-default fallbacks + // (host.docker.internal on darwin, 172.17.0.1 on linux) are + // already covered by the static DNS/IP lists above. + if override := os.Getenv("KAGENT_LOCAL_HOST"); override != "" { + if ip := net.ParseIP(override); ip != nil { + ips = append(ips, ip) + } else { + dns = append(dns, override) + } + } + + return dns, ips +} + +// createCASecret writes the CA PEM produced by setupMockMCP into a +// Kubernetes Secret in the kagent namespace under the given name and +// key. Registers for cleanup. Returns the Secret so the caller can +// reference it from RMS.spec.tls. +func createCASecret(t *testing.T, cli client.Client, name, key string, ca []byte) *corev1.Secret { + t.Helper() + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "kagent"}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{key: ca}, + } + require.NoError(t, cli.Create(t.Context(), secret)) + cleanup(t, cli, secret) + return secret +} + +// createRMS creates the RMS in the kagent namespace via the typed +// client and waits for Status.DiscoveredTools to populate — that's the +// signal the controller's tool-discovery code path completed end-to-end +// (including the TLS handshake when applicable). Returns the RMS so the +// caller can assert on Status if needed. +func createRMS(t *testing.T, cli client.Client, spec v1alpha2.RemoteMCPServerSpec) *v1alpha2.RemoteMCPServer { + t.Helper() + rms := &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-rms-", + Namespace: "kagent", + }, + Spec: spec, + } + require.NoError(t, cli.Create(t.Context(), rms)) + cleanup(t, cli, rms) + + // Poll Status.DiscoveredTools — populates only after a successful + // ListTools call against the upstream, which is the end-to-end + // signal we want. + pollErr := wait.PollUntilContextTimeout(t.Context(), 2*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) { + fresh := &v1alpha2.RemoteMCPServer{} + if err := cli.Get(ctx, client.ObjectKey{Namespace: rms.Namespace, Name: rms.Name}, fresh); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } + if len(fresh.Status.DiscoveredTools) > 0 { + *rms = *fresh + return true, nil + } + return false, nil + }) + require.NoError(t, pollErr, "RMS Status.DiscoveredTools did not populate within timeout") + return rms +} + +// waitForAPIDiscoveredTools polls GET /api/toolservers until the named +// RemoteMCPServer's DiscoveredTools list is non-empty, then returns the +// tools. The CRD status field and this HTTP response come from +// different write paths in the reconciler (the field is set by +// setRemoteMCPServerStatusConditions, the API serves rows the +// reconciler persisted via RefreshToolsForServer), so a working CRD +// status doesn't strictly imply the API has caught up — poll instead +// of assuming. +func waitForAPIDiscoveredTools(t *testing.T, namespace, name string) []*v1alpha2.MCPTool { + t.Helper() + ref := namespace + "/" + name + var matched []*v1alpha2.MCPTool + + pollErr := wait.PollUntilContextTimeout(t.Context(), 2*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, kagentURL()+"/api/toolservers", nil) + if err != nil { + return false, err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return false, nil + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return false, nil + } + var list httpapi.StandardResponse[[]httpapi.ToolServerResponse] + if err := json.NewDecoder(resp.Body).Decode(&list); err != nil { + return false, err + } + for _, ts := range list.Data { + if ts.Ref == ref && len(ts.DiscoveredTools) > 0 { + matched = ts.DiscoveredTools + return true, nil + } + } + return false, nil + }) + require.NoError(t, pollErr, "timed out waiting for tools on /api/toolservers for %s", ref) + return matched +} + +// generateSelfSignedCert mints an ECDSA self-signed certificate scoped +// to the given DNS names + IPs. Returns the certificate PEM, the key +// PEM (both bundled for mockmcp), and a CA PEM identical to the +// certificate (since it's self-signed the cert IS its own trust +// anchor). For more elaborate chains the CA could be separate, but +// self-signed-as-CA is sufficient for what RMS TLS needs to exercise: +// the controller pins a single PEM as RootCAs. +func generateSelfSignedCert(t *testing.T, dnsNames []string, ips []net.IP) (certPEM, keyPEM, caPEM []byte) { + t.Helper() + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + require.NoError(t, err) + + tmpl := &x509.Certificate{ + SerialNumber: big.NewInt(time.Now().UnixNano()), + Subject: pkix.Name{CommonName: "kagent-e2e-mockmcp"}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(24 * time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment | x509.KeyUsageCertSign, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + DNSNames: dnsNames, + IPAddresses: ips, + IsCA: true, + BasicConstraintsValid: true, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key) + require.NoError(t, err) + cert := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}) + + keyDER, err := x509.MarshalPKCS8PrivateKey(key) + require.NoError(t, err) + keyPEM = pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: keyDER}) + + // Self-signed: the cert IS the CA bundle. + return cert, keyPEM, cert +} + +// TestE2E_RMS_PrivateCAUpstream exercises the production path: +// RMS.spec.tls.caCertSecretRef pins a CA Secret, the controller reads +// the Secret to build its tool-discovery http.Client, and the agent +// pod mounts the same Secret to use during invocation. The mockmcp +// fixture is the upstream MCP server; its request recorder lets us +// assert that the controller's ListTools call AND the agent's +// tools/call both reached the fixture over TLS with the right cert +// chain. +func TestE2E_RMS_PrivateCAUpstream(t *testing.T) { + baseURL, stopServer := setupMockServer(t, "mocks/invoke_remotemcpserver_tls_agent.json") + defer stopServer() + + cli := setupK8sClient(t, false) + + mcp := setupMockMCP(t, true, mockmcp.Options{}) + caSecret := createCASecret(t, cli, "e2e-rms-tls-ca", "ca.crt", mcp.caCertPEM) + + rms := createRMS(t, cli, v1alpha2.RemoteMCPServerSpec{ + Description: "Private-CA HTTPS upstream", + URL: mcp.mcpURL, + Protocol: v1alpha2.RemoteMCPServerProtocolStreamableHttp, + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: caSecret.Name, + CACertSecretKey: "ca.crt", + }, + }) + + // Sanity-check the controller's ListTools call landed on mockmcp. + requests := mcp.server.Requests() + assert.NotEmpty(t, requests, "mockmcp should have recorded the controller's ListTools call") + + // DiscoveredTools should include mockmcp's default tool set on the + // CRD status (the K8s API surface). + toolNames := make(map[string]bool) + for _, tool := range rms.Status.DiscoveredTools { + toolNames[tool.Name] = true + } + assert.True(t, toolNames["add_numbers"], "expected add_numbers in Status.DiscoveredTools, got %v", toolNames) + + // And on the kagent HTTP API (what the UI calls). The reconciler + // persists the post-ListTools result to the DB at + // reconciler.go:RefreshToolsForServer, separate from the CRD status + // write — verifying both confirms the controller "shows" tools on + // every surface an operator might look at. + apiTools := waitForAPIDiscoveredTools(t, rms.Namespace, rms.Name) + apiNames := make(map[string]bool) + for _, tool := range apiTools { + apiNames[tool.Name] = true + } + assert.True(t, apiNames["add_numbers"], "expected add_numbers via /api/toolservers, got %v", apiNames) + + // Drive an agent invocation through the A2A endpoint. The mock LLM + // is preprogrammed to issue a tools/call to add_numbers; mockmcp + // answers it via the request recorder. + modelCfg := setupModelConfig(t, cli, baseURL) + agent := setupAgent(t, cli, modelCfg.Name, []*v1alpha2.Tool{{ + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + ApiGroup: "kagent.dev", + Kind: "RemoteMCPServer", + Name: rms.Name, + }, + ToolNames: []string{"add_numbers"}, + }, + }}) + a2aClient := setupA2AClient(t, agent) + + runSyncTest(t, a2aClient, "add 2 and 3", "5", nil) + + // The agent's tools/call should also have reached mockmcp. + postInvoke := mcp.server.Requests() + assert.Greater(t, len(postInvoke), len(requests), + "mockmcp should have recorded additional requests from the agent's tools/call") +} + +// TestE2E_RMS_DisableVerify confirms the test-fixture escape hatch +// works end-to-end: an RMS with disableVerify=true (and no Secret) +// connects successfully to the same self-signed fixture that +// PrivateCAUpstream uses. The point of the test is to prove the +// controller actually skipped verification — if disableVerify +// silently fell back to system trust, the handshake would fail on +// the self-signed cert. +func TestE2E_RMS_DisableVerify(t *testing.T) { + baseURL, stopServer := setupMockServer(t, "mocks/invoke_remotemcpserver_tls_agent.json") + defer stopServer() + + cli := setupK8sClient(t, false) + + mcp := setupMockMCP(t, true, mockmcp.Options{}) + + rms := createRMS(t, cli, v1alpha2.RemoteMCPServerSpec{ + Description: "Skip-verify HTTPS upstream", + URL: mcp.mcpURL, + Protocol: v1alpha2.RemoteMCPServerProtocolStreamableHttp, + TLS: &v1alpha2.TLSConfig{ + DisableVerify: true, + }, + }) + + assert.NotEmpty(t, rms.Status.DiscoveredTools, + "DiscoveredTools should populate even though the upstream cert isn't trusted by system CAs") + + modelCfg := setupModelConfig(t, cli, baseURL) + agent := setupAgent(t, cli, modelCfg.Name, []*v1alpha2.Tool{{ + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + ApiGroup: "kagent.dev", + Kind: "RemoteMCPServer", + Name: rms.Name, + }, + ToolNames: []string{"add_numbers"}, + }, + }}) + a2aClient := setupA2AClient(t, agent) + + runSyncTest(t, a2aClient, "add 2 and 3", "5", nil) +} + +// TestE2E_RMS_SSE_TLS exercises the SSE-transport-with-TLS code path. +// SSE and Streamable HTTP go through different translator functions +// (translateSseHttpTool vs translateStreamableHttpTool) and the Python +// runtime has separate factory-installation logic per transport. Both +// must work against a real /sse endpoint backed by a private CA. +func TestE2E_RMS_SSE_TLS(t *testing.T) { + baseURL, stopServer := setupMockServer(t, "mocks/invoke_remotemcpserver_tls_agent.json") + defer stopServer() + + cli := setupK8sClient(t, false) + + mcp := setupMockMCP(t, true, mockmcp.Options{}) + caSecret := createCASecret(t, cli, "e2e-rms-sse-ca", "ca.crt", mcp.caCertPEM) + + rms := createRMS(t, cli, v1alpha2.RemoteMCPServerSpec{ + Description: "Private-CA SSE upstream", + URL: mcp.sseURL, + Protocol: v1alpha2.RemoteMCPServerProtocolSse, + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: caSecret.Name, + CACertSecretKey: "ca.crt", + }, + }) + + // Find the actual mockmcp request path the controller landed on to + // confirm SSE was used (not Streamable HTTP). The mockmcp.SSEPath + // is the request side; the recorder captures it on every request. + requests := mcp.server.Requests() + require.NotEmpty(t, requests, "mockmcp should have recorded the controller's ListTools call over SSE") + var sawSSE bool + for _, req := range requests { + if req.Path == mockmcp.SSEPath { + sawSSE = true + break + } + } + assert.True(t, sawSSE, "expected at least one request on the SSE path (%s); recorded paths: %v", + mockmcp.SSEPath, recordedPaths(requests)) + + modelCfg := setupModelConfig(t, cli, baseURL) + agent := setupAgent(t, cli, modelCfg.Name, []*v1alpha2.Tool{{ + Type: v1alpha2.ToolProviderType_McpServer, + McpServer: &v1alpha2.McpServerTool{ + TypedReference: v1alpha2.TypedReference{ + ApiGroup: "kagent.dev", + Kind: "RemoteMCPServer", + Name: rms.Name, + }, + ToolNames: []string{"add_numbers"}, + }, + }}) + a2aClient := setupA2AClient(t, agent) + + runSyncTest(t, a2aClient, "add 2 and 3", "5", nil) +} + +// TestE2E_API_ToolServerCompanionSecrets posts a ToolServerCreateRequest +// with inline SecretMaterials and asserts the controller materializes +// both the RMS and the companion Secret in a single round-trip — the +// "one POST" UX equivalent of ModelConfig's existing inline-Secret +// support. Also exercises the OwnerReference: deleting the RMS should +// cascade-delete the Secret via K8s GC. +// +// This test doesn't need mockmcp running; it stops at "did the +// controller create the resources correctly?", which is the API +// contract under test. +func TestE2E_API_ToolServerCompanionSecrets(t *testing.T) { + cli := setupK8sClient(t, true) + + rmsName := fmt.Sprintf("e2e-rms-companion-%d", time.Now().UnixNano()) + caSecretName := rmsName + "-ca" + + body, err := json.Marshal(handlers.ToolServerCreateRequest{ + Type: "RemoteMCPServer", + RemoteMCPServer: &v1alpha2.RemoteMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: rmsName, + Namespace: "kagent", + }, + Spec: v1alpha2.RemoteMCPServerSpec{ + Description: "Companion-Secret integration test", + URL: "https://nowhere.invalid/mcp", + TLS: &v1alpha2.TLSConfig{ + CACertSecretRef: caSecretName, + CACertSecretKey: "ca.crt", + }, + }, + }, + Secrets: []httpapi.SecretMaterial{ + {Name: caSecretName, Key: "ca.crt", Value: "FAKE PEM CONTENT"}, + }, + }) + require.NoError(t, err) + + req, err := http.NewRequestWithContext(t.Context(), "POST", + kagentURL()+"/api/toolservers", bytes.NewReader(body)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, http.StatusCreated, resp.StatusCode, "expected 201 Created on POST /api/toolservers") + + // The RMS should exist in K8s. + rms := &v1alpha2.RemoteMCPServer{} + require.NoError(t, cli.Get(t.Context(), + client.ObjectKey{Namespace: "kagent", Name: rmsName}, rms)) + // Companion-Secret deletion happens via OwnerReference GC when we + // delete the RMS in cleanup; register this for that path. + cleanup(t, cli, rms) + + // The Secret should exist in K8s with the expected content. + secret := &corev1.Secret{} + require.NoError(t, cli.Get(t.Context(), + client.ObjectKey{Namespace: "kagent", Name: caSecretName}, secret)) + assert.Equal(t, []byte("FAKE PEM CONTENT"), secret.Data["ca.crt"]) + + // OwnerReference should point back at the RMS so K8s GC cleans it up. + require.Len(t, secret.OwnerReferences, 1) + or := secret.OwnerReferences[0] + assert.Equal(t, "RemoteMCPServer", or.Kind) + assert.Equal(t, rmsName, or.Name) + assert.Equal(t, v1alpha2.GroupVersion.Identifier(), or.APIVersion) +} + +// recordedPaths is a small helper for assertion failure messages — turns +// the request recorder's snapshot into a human-readable list of paths +// when an expected request doesn't show up. +func recordedPaths(reqs []mockmcp.RecordedRequest) []string { + paths := make([]string, len(reqs)) + for i, req := range reqs { + paths[i] = req.Method + " " + req.Path + } + return paths +} diff --git a/go/go.mod b/go/go.mod index 1b498ee8d8..75591f809a 100644 --- a/go/go.mod +++ b/go/go.mod @@ -66,6 +66,7 @@ require ( github.com/golang/protobuf v1.5.4 github.com/google/jsonschema-go v0.4.3 github.com/jackc/pgx/v5 v5.9.2 + github.com/kagent-dev/mockmcp v0.0.0-20260520211643-dcd475b74085 github.com/ollama/ollama v0.24.0 github.com/testcontainers/testcontainers-go v0.42.0 github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0 diff --git a/go/go.sum b/go/go.sum index 7392185179..670ca537f7 100644 --- a/go/go.sum +++ b/go/go.sum @@ -498,6 +498,8 @@ github.com/kagent-dev/kmcp v0.3.0 h1:CuF8LN6JbPoy75saRzwI5OIcSVZUgiwB0BeWtq+rAO4 github.com/kagent-dev/kmcp v0.3.0/go.mod h1:g7wS/3m2wonRo/1DMwVoHxnilr/urPgV2hwV1DwkwrQ= github.com/kagent-dev/mockllm v0.0.5 h1:mm9Ml3NH6/E/YKVMgMwWYMNsNGkDze6I6TC0ppHZAo8= github.com/kagent-dev/mockllm v0.0.5/go.mod h1:tDLemRsTZa1NdHaDbg3sgFk9cT1QWvMPlBtLVD6I2mA= +github.com/kagent-dev/mockmcp v0.0.0-20260520211643-dcd475b74085 h1:5bYxRc6K6+JiFMYGSJpQ7y18PzTGXUmUtVXY7Fqh0Ew= +github.com/kagent-dev/mockmcp v0.0.0-20260520211643-dcd475b74085/go.mod h1:vTMste7c+XiDOQyhe6iGPKLbMy3chY/h2fL3Dox5fAE= github.com/karamaru-alpha/copyloopvar v1.2.2 h1:yfNQvP9YaGQR7VaWLYcfZUlRP2eo2vhExWKxD/fP6q0= github.com/karamaru-alpha/copyloopvar v1.2.2/go.mod h1:oY4rGZqZ879JkJMtX3RRkcXRkmUvH0x35ykgaKgsgJY= github.com/kisielk/errcheck v1.10.0 h1:Lvs/YAHP24YKg08LA8oDw2z9fJVme090RAXd90S+rrw= diff --git a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml index 00b21b6da0..5aa3eeff71 100644 --- a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml +++ b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml @@ -652,17 +652,18 @@ spec: properties: caCertSecretKey: description: |- - CACertSecretKey is the key within the Secret that contains the CA certificate data. - This field follows the same pattern as APIKeySecretKey. - Required when CACertSecretRef is set (unless DisableVerify is true). + CACertSecretKey is the key within the Secret that contains the + CA certificate data (PEM-encoded). Required when CACertSecretRef + is set (unless DisableVerify is true). type: string caCertSecretRef: description: |- CACertSecretRef is a reference to a Kubernetes Secret containing CA certificate(s) in PEM format. The Secret must be in the same - namespace as the ModelConfig. - When set, the certificate will be used to verify the provider's SSL certificate. - This field follows the same pattern as APIKeySecret. + namespace as the resource referencing it (ModelConfig, + RemoteMCPServer, or any future consumer of TLSConfig). + When set, the certificate will be used to verify the upstream's + SSL certificate. type: string disableSystemCAs: default: false @@ -682,6 +683,20 @@ spec: Production deployments MUST use proper certificates. type: boolean type: object + x-kubernetes-validations: + - message: caCertSecretKey requires caCertSecretRef + rule: '!(has(self.caCertSecretKey) && size(self.caCertSecretKey) + > 0 && (!has(self.caCertSecretRef) || size(self.caCertSecretRef) + == 0))' + - message: caCertSecretRef requires caCertSecretKey + rule: '!(has(self.caCertSecretRef) && size(self.caCertSecretRef) + > 0 && (!has(self.caCertSecretKey) || size(self.caCertSecretKey) + == 0))' + - message: disableSystemCAs requires caCertSecretRef or disableVerify + (trust-nothing config rejects every upstream) + rule: '!(has(self.disableSystemCAs) && self.disableSystemCAs && + (!has(self.disableVerify) || !self.disableVerify) && (!has(self.caCertSecretRef) + || size(self.caCertSecretRef) == 0))' required: - model type: object @@ -719,22 +734,6 @@ spec: rule: '!(has(self.apiKeyPassthrough) && self.apiKeyPassthrough && (self.provider == ''Gemini'' || self.provider == ''GeminiVertexAI'' || self.provider == ''AnthropicVertexAI''))' - - message: caCertSecretKey requires caCertSecretRef - rule: '!(has(self.tls) && has(self.tls.caCertSecretKey) && size(self.tls.caCertSecretKey) - > 0 && (!has(self.tls.caCertSecretRef) || size(self.tls.caCertSecretRef) - == 0))' - - message: caCertSecretKey requires caCertSecretRef (unless disableVerify - is true) - rule: '!(has(self.tls) && (!has(self.tls.disableVerify) || !self.tls.disableVerify) - && has(self.tls.caCertSecretKey) && size(self.tls.caCertSecretKey) - > 0 && (!has(self.tls.caCertSecretRef) || size(self.tls.caCertSecretRef) - == 0))' - - message: caCertSecretRef requires caCertSecretKey (unless disableVerify - is true) - rule: '!(has(self.tls) && (!has(self.tls.disableVerify) || !self.tls.disableVerify) - && has(self.tls.caCertSecretRef) && size(self.tls.caCertSecretRef) - > 0 && (!has(self.tls.caCertSecretKey) || size(self.tls.caCertSecretKey) - == 0))' - message: openAI.tokenExchange requires apiKeySecret (the service account secret) rule: '!(has(self.openAI) && has(self.openAI.tokenExchange) && (!has(self.apiKeySecret) diff --git a/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml b/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml index f8bbcf0b1d..4ff81906eb 100644 --- a/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml +++ b/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml @@ -179,6 +179,64 @@ spec: timeout: default: 30s type: string + tls: + description: |- + TLS configuration for the upstream MCP server connection. + Use this for HTTPS upstreams that present a certificate the agent's + system trust store does not include (corporate CA, self-signed cert + on a test fixture, internal MCP gateway). Reuses the same TLSConfig + type as ModelConfig.spec.tls, with identical semantics: disableVerify + turns off certificate validation entirely, caCertSecretRef + + caCertSecretKey point at a PEM bundle Secret in the same namespace, + and disableSystemCAs trusts only the named bundle. + properties: + caCertSecretKey: + description: |- + CACertSecretKey is the key within the Secret that contains the + CA certificate data (PEM-encoded). Required when CACertSecretRef + is set (unless DisableVerify is true). + type: string + caCertSecretRef: + description: |- + CACertSecretRef is a reference to a Kubernetes Secret containing + CA certificate(s) in PEM format. The Secret must be in the same + namespace as the resource referencing it (ModelConfig, + RemoteMCPServer, or any future consumer of TLSConfig). + When set, the certificate will be used to verify the upstream's + SSL certificate. + type: string + disableSystemCAs: + default: false + description: |- + DisableSystemCAs disables the use of system CA certificates. + When false (default), system CA certificates are used for verification (safe behavior). + When true, only the custom CA from CACertSecretRef is trusted. + This allows strict security policies where only corporate CAs should be trusted. + type: boolean + disableVerify: + default: false + description: |- + DisableVerify disables SSL certificate verification entirely. + When false (default), SSL certificates are verified. + When true, SSL certificate verification is disabled. + WARNING: This should ONLY be used in development/testing environments. + Production deployments MUST use proper certificates. + type: boolean + type: object + x-kubernetes-validations: + - message: caCertSecretKey requires caCertSecretRef + rule: '!(has(self.caCertSecretKey) && size(self.caCertSecretKey) + > 0 && (!has(self.caCertSecretRef) || size(self.caCertSecretRef) + == 0))' + - message: caCertSecretRef requires caCertSecretKey + rule: '!(has(self.caCertSecretRef) && size(self.caCertSecretRef) + > 0 && (!has(self.caCertSecretKey) || size(self.caCertSecretKey) + == 0))' + - message: disableSystemCAs requires caCertSecretRef or disableVerify + (trust-nothing config rejects every upstream) + rule: '!(has(self.disableSystemCAs) && self.disableSystemCAs && + (!has(self.disableVerify) || !self.disableVerify) && (!has(self.caCertSecretRef) + || size(self.caCertSecretRef) == 0))' url: minLength: 1 type: string @@ -263,6 +321,12 @@ spec: Important: Run "make" to regenerate code after modifying this file format: int64 type: integer + secretHash: + description: |- + SecretHash stores a hash of the TLS Secret referenced by spec.tls so + agents that consume this RemoteMCPServer can detect cert rotation and + roll on the next reconcile. Empty when spec.tls.caCertSecretRef is unset. + type: string type: object type: object served: true diff --git a/python/packages/kagent-adk/src/kagent/adk/models/_ssl.py b/python/packages/kagent-adk/src/kagent/adk/models/_ssl.py index ffd0b831eb..a634a58e49 100644 --- a/python/packages/kagent-adk/src/kagent/adk/models/_ssl.py +++ b/python/packages/kagent-adk/src/kagent/adk/models/_ssl.py @@ -175,13 +175,13 @@ def create_ssl_context( >>> # Use only custom CA certificate >>> ctx = create_ssl_context( - ... disable_verify=False, ca_cert_path="/etc/ssl/certs/custom/ca.crt", disable_system_cas=True + ... disable_verify=False, ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", disable_system_cas=True ... ) >>> assert isinstance(ctx, ssl.SSLContext) >>> # Use system CAs plus custom CA >>> ctx = create_ssl_context( - ... disable_verify=False, ca_cert_path="/etc/ssl/certs/custom/ca.crt", disable_system_cas=False + ... disable_verify=False, ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", disable_system_cas=False ... ) >>> assert isinstance(ctx, ssl.SSLContext) """ diff --git a/python/packages/kagent-adk/src/kagent/adk/types.py b/python/packages/kagent-adk/src/kagent/adk/types.py index 5e2f4a97af..ca6da6b4a9 100644 --- a/python/packages/kagent-adk/src/kagent/adk/types.py +++ b/python/packages/kagent-adk/src/kagent/adk/types.py @@ -11,10 +11,11 @@ from google.adk.models.anthropic_llm import Claude as ClaudeLLM from google.adk.models.google_llm import Gemini as GeminiLLM from google.adk.tools.mcp_tool import SseConnectionParams, StreamableHTTPConnectionParams -from pydantic import AliasChoices, BaseModel, Field +from pydantic import AliasChoices, BaseModel, Field, model_validator from kagent.adk._approval import make_approval_callback, strip_confirmation_parts_callback from kagent.adk._mcp_toolset import KAgentMcpToolset +from kagent.adk.models._ssl import create_ssl_context from kagent.adk._remote_a2a_tool import KAgentRemoteA2AToolset from kagent.adk.models._anthropic import KAgentAnthropicLlm from kagent.adk.models._bedrock import KAgentBedrockLlm @@ -141,14 +142,113 @@ def _convert_ollama_options(options: dict[str, str] | None) -> dict[str, Any]: return converted -class HttpMcpServerConfig(BaseModel): +def _build_tls_httpx_client_factory( + *, + disable_verify: bool, + ca_cert_path: str | None, + disable_system_cas: bool, +) -> Callable[..., httpx.AsyncClient]: + """Returns an httpx_client_factory suitable for google-adk MCP transport + connection params. The factory applies the kagent TLS configuration + (disableVerify / caCert / disableSystemCAs) to every httpx.AsyncClient + the MCP session manager constructs. + """ + ssl_ctx = create_ssl_context( + disable_verify=disable_verify, + ca_cert_path=ca_cert_path, + disable_system_cas=disable_system_cas, + ) + + def _factory( + headers: dict[str, str] | None = None, + timeout: httpx.Timeout | None = None, + auth: httpx.Auth | None = None, + ) -> httpx.AsyncClient: + kwargs: dict[str, Any] = { + "follow_redirects": True, + "verify": ssl_ctx, + } + if timeout is not None: + kwargs["timeout"] = timeout + else: + # Mirror google-adk's create_mcp_http_client defaults. + kwargs["timeout"] = httpx.Timeout(30, read=300) + if headers is not None: + kwargs["headers"] = headers + if auth is not None: + kwargs["auth"] = auth + return httpx.AsyncClient(**kwargs) + + return _factory + + +class _McpTlsMixin(BaseModel): + """Shared TLS plumbing for HttpMcpServerConfig / SseMcpServerConfig. + + The kagent controller emits TLS keys nested under `params` on the + wire (so the same JSON schema works for both Go and Python + runtimes); we lift them up at load time and install a TLS-aware + httpx_client_factory on the params instance at toolset construction + time. Lift-into-wire-config + install-into-params keeps google-adk's + upstream types unchanged and avoids exporting our own params + subclasses that would shadow the upstream names. + """ + + tls_insecure_skip_verify: bool | None = None + tls_ca_cert_path: str | None = None + tls_disable_system_cas: bool | None = None + + @model_validator(mode="before") + @classmethod + def _lift_tls_from_params(cls, values: Any) -> Any: + # The kagent controller emits TLS keys nested under `params` so + # the same JSON wire schema works for both Go and Python runtimes; + # lift them to the top level. google-adk's params types use + # pydantic's default extra="ignore", so the duplicate copies that + # remain on `params` are silently dropped at construction. + if isinstance(values, dict) and isinstance(values.get("params"), dict): + params = values["params"] + for key in ("tls_insecure_skip_verify", "tls_ca_cert_path", "tls_disable_system_cas"): + if key in params and key not in values: + values[key] = params[key] + return values + + def _apply_tls_to_params(self, params: Any) -> None: + """Install a TLS-aware httpx_client_factory on the params instance + if any TLS field is set. No-op when no TLS config was supplied so + the upstream google-adk default factory remains in place.""" + if ( + self.tls_insecure_skip_verify is None + and self.tls_ca_cert_path is None + and self.tls_disable_system_cas is None + ): + return + factory = _build_tls_httpx_client_factory( + disable_verify=self.tls_insecure_skip_verify or False, + ca_cert_path=self.tls_ca_cert_path, + disable_system_cas=self.tls_disable_system_cas or False, + ) + if hasattr(params, "httpx_client_factory"): + params.httpx_client_factory = factory + else: + # Older google-adk (< 1.28) lacked httpx_client_factory on + # SseConnectionParams. The pyproject floor forbids this in + # production; warn loudly for stale dev environments. + logger.warning( + "TLS configuration ignored on %s: google-adk does not expose " + "httpx_client_factory on this params type — upgrade to ≥ 1.28.1.", + type(params).__name__, + ) + + +class HttpMcpServerConfig(_McpTlsMixin): params: StreamableHTTPConnectionParams tools: list[str] = Field(default_factory=list) allowed_headers: list[str] | None = None # Headers to forward from A2A request to MCP calls require_approval: list[str] | None = None # Tools requiring human approval before execution -class SseMcpServerConfig(BaseModel): +class SseMcpServerConfig(_McpTlsMixin): params: SseConnectionParams tools: list[str] = Field(default_factory=list) allowed_headers: list[str] | None = None # Headers to forward from A2A request to MCP calls @@ -310,6 +410,10 @@ def to_agent( sts_header_provider = sts_integration.header_provider if self.http_tools: for http_tool in self.http_tools: # add http tools + # Install a TLS-aware httpx_client_factory on the params + # before constructing the toolset, so every MCP session + # the session manager opens trusts the configured CA. + http_tool._apply_tls_to_params(http_tool.params) # Create header provider combining STS and allowed headers for this tool tool_header_provider = create_header_provider( allowed_headers=http_tool.allowed_headers, @@ -326,6 +430,7 @@ def to_agent( tools_requiring_approval.update(http_tool.require_approval) if self.sse_tools: for sse_tool in self.sse_tools: # add sse tools + sse_tool._apply_tls_to_params(sse_tool.params) # Create header provider combining STS and allowed headers for this tool tool_header_provider = create_header_provider( allowed_headers=sse_tool.allowed_headers, diff --git a/python/packages/kagent-adk/tests/unittests/models/test_openai.py b/python/packages/kagent-adk/tests/unittests/models/test_openai.py index 392c2d8a4b..5dba0e52ac 100644 --- a/python/packages/kagent-adk/tests/unittests/models/test_openai.py +++ b/python/packages/kagent-adk/tests/unittests/models/test_openai.py @@ -597,7 +597,7 @@ def test_openai_client_with_custom_ca_certificate(): model="gpt-3.5-turbo", type="openai", api_key="fake", - tls_ca_cert_path="/etc/ssl/certs/custom/ca.crt", + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", tls_disable_system_cas=False, ) @@ -607,7 +607,7 @@ def test_openai_client_with_custom_ca_certificate(): # Verify create_ssl_context was called with correct parameters mock_create_ssl.assert_called_once_with( disable_verify=False, - ca_cert_path="/etc/ssl/certs/custom/ca.crt", + ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", disable_system_cas=False, ) @@ -633,7 +633,7 @@ def test_openai_client_with_custom_ca_only(): model="gpt-3.5-turbo", type="openai", api_key="fake", - tls_ca_cert_path="/etc/ssl/certs/custom/ca.crt", + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", tls_disable_system_cas=True, ) @@ -643,7 +643,7 @@ def test_openai_client_with_custom_ca_only(): # Verify create_ssl_context was called with disable_system_cas=True mock_create_ssl.assert_called_once_with( disable_verify=False, - ca_cert_path="/etc/ssl/certs/custom/ca.crt", + ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", disable_system_cas=True, ) @@ -691,7 +691,7 @@ def test_azure_openai_client_with_tls(): api_key="fake", azure_endpoint="https://test.openai.azure.com", api_version="2024-02-15-preview", - tls_ca_cert_path="/etc/ssl/certs/custom/ca.crt", + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", ) # Access _client to trigger client creation @@ -700,7 +700,7 @@ def test_azure_openai_client_with_tls(): # Verify SSL context was created mock_create_ssl.assert_called_once_with( disable_verify=False, - ca_cert_path="/etc/ssl/certs/custom/ca.crt", + ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", disable_system_cas=False, ) @@ -732,7 +732,7 @@ def test_openai_client_with_base_url_and_tls(): type="openai", api_key="fake", base_url="https://litellm.internal.corp:8080", - tls_ca_cert_path="/etc/ssl/certs/custom/ca.crt", + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", ) # Access _client to trigger client creation diff --git a/python/packages/kagent-adk/tests/unittests/models/test_tls_e2e.py b/python/packages/kagent-adk/tests/unittests/models/test_tls_e2e.py index d0af98388e..edd7ca6b00 100644 --- a/python/packages/kagent-adk/tests/unittests/models/test_tls_e2e.py +++ b/python/packages/kagent-adk/tests/unittests/models/test_tls_e2e.py @@ -549,7 +549,7 @@ async def test_e2e_ssl_error_contains_troubleshooting_info(): # Generate troubleshooting message message = get_ssl_troubleshooting_message( error=error, - ca_cert_path="/etc/ssl/certs/custom/ca.crt", + ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", server_url="localhost:8443", ) @@ -559,7 +559,7 @@ async def test_e2e_ssl_error_contains_troubleshooting_info(): assert "kubectl exec" in message assert "openssl x509" in message assert "openssl s_client" in message - assert "/etc/ssl/certs/custom/ca.crt" in message + assert "/etc/ssl/certs/custom/corp-ca/ca.crt" in message assert "localhost:8443" in message assert "kagent.dev/docs" in message diff --git a/python/packages/kagent-adk/tests/unittests/test_mcp_tls.py b/python/packages/kagent-adk/tests/unittests/test_mcp_tls.py new file mode 100644 index 0000000000..915db3eab2 --- /dev/null +++ b/python/packages/kagent-adk/tests/unittests/test_mcp_tls.py @@ -0,0 +1,222 @@ +"""Unit tests for the kagent TLS wiring on MCP server wire configs. + +The kagent controller emits TLS keys nested under ``params`` on the +wire (matching the Go runtime's shape). kagent-adk lifts those keys up +to the wire-config level via a pydantic ``model_validator(mode="before")`` +and installs a TLS-aware ``httpx_client_factory`` on the underlying +google-adk params instance at toolset construction time, so every MCP +session the manager opens applies the operator's TLS config. + +These tests exercise the lift and the factory installation without +standing up a real MCP server. +""" + +from unittest import mock + +import httpx +import pytest +from google.adk.tools.mcp_tool import SseConnectionParams, StreamableHTTPConnectionParams + +from kagent.adk.types import HttpMcpServerConfig, SseMcpServerConfig + + +def test_lifts_tls_fields_from_nested_params_on_streamable(): + """The controller emits ``{"params": {"url": ..., "tls_ca_cert_path": ...}}``; + the pre-validator lifts the TLS keys onto the wire-config object so + the runtime can read them without digging into google-adk's params + type (whose ``extra="ignore"`` would silently drop them).""" + cfg = HttpMcpServerConfig.model_validate( + { + "params": { + "url": "https://upstream.example.com/mcp", + "tls_insecure_skip_verify": True, + "tls_ca_cert_path": "/etc/ssl/certs/custom/corp-ca/ca.crt", + "tls_disable_system_cas": False, + }, + "tools": ["t1"], + } + ) + assert cfg.tls_insecure_skip_verify is True + assert cfg.tls_ca_cert_path == "/etc/ssl/certs/custom/corp-ca/ca.crt" + assert cfg.tls_disable_system_cas is False + + +def test_lifts_tls_fields_from_nested_params_on_sse(): + """SSE wire-config gets the same lift behavior as Streamable HTTP so + operators don't need to know which transport is in use.""" + cfg = SseMcpServerConfig.model_validate( + { + "params": { + "url": "https://upstream.example.com/sse", + "tls_insecure_skip_verify": False, + "tls_ca_cert_path": "/etc/ssl/certs/custom/corp-ca/ca.crt", + }, + } + ) + assert cfg.tls_insecure_skip_verify is False + assert cfg.tls_ca_cert_path == "/etc/ssl/certs/custom/corp-ca/ca.crt" + + +def test_lift_respects_explicit_top_level_value(): + """When the wire config already carries an explicit top-level TLS + value, the nested-params lift must not override it. Production never + emits both, but a Python caller building configs directly might.""" + cfg = HttpMcpServerConfig.model_validate( + { + "params": { + "url": "https://upstream.example.com/mcp", + "tls_ca_cert_path": "/from/params", + }, + "tools": [], + "tls_ca_cert_path": "/from/top-level", + } + ) + assert cfg.tls_ca_cert_path == "/from/top-level" + + +def test_no_tls_config_leaves_factory_default(): + """When no TLS fields are set, ``_apply_tls_to_params`` is a no-op + so google-adk's upstream default factory remains in place. This is + the common case for any RemoteMCPServer without ``spec.tls`` set.""" + params = StreamableHTTPConnectionParams(url="https://upstream.example.com/mcp") + original_factory = params.httpx_client_factory + + cfg = HttpMcpServerConfig(params=params, tools=[]) + cfg._apply_tls_to_params(cfg.params) + + assert cfg.params.httpx_client_factory is original_factory + + +def test_disable_verify_installs_factory_with_verify_false(): + """``tls_insecure_skip_verify=True`` is the test-fixture escape hatch. + The factory it installs must hand httpx ``verify=False`` so self-signed + upstreams accept the connection.""" + params = StreamableHTTPConnectionParams(url="https://upstream.example.com/mcp") + cfg = HttpMcpServerConfig( + params=params, + tools=[], + tls_insecure_skip_verify=True, + ) + + with mock.patch("kagent.adk.types.create_ssl_context") as mock_create: + mock_create.return_value = False # httpx accepts False to disable + cfg._apply_tls_to_params(cfg.params) + + mock_create.assert_called_once_with( + disable_verify=True, + ca_cert_path=None, + disable_system_cas=False, + ) + + # Calling the installed factory should produce an httpx.AsyncClient + # configured with the SSL context returned by create_ssl_context. + with mock.patch("kagent.adk.types.httpx.AsyncClient") as mock_client: + cfg.params.httpx_client_factory() + kwargs = mock_client.call_args[1] + assert kwargs["verify"] is False + + +def test_custom_ca_path_installs_factory_with_ssl_context(): + """The production case: ``tls_ca_cert_path`` pins a corporate CA. The + factory must call ``create_ssl_context`` with the same path so the + resulting SSL context trusts that CA.""" + params = StreamableHTTPConnectionParams(url="https://mcp.corp.internal/mcp") + cfg = HttpMcpServerConfig( + params=params, + tools=[], + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", + ) + + fake_ctx = object() # stand-in for an ssl.SSLContext + with mock.patch("kagent.adk.types.create_ssl_context") as mock_create: + mock_create.return_value = fake_ctx + cfg._apply_tls_to_params(cfg.params) + + mock_create.assert_called_once_with( + disable_verify=False, + ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", + disable_system_cas=False, + ) + + with mock.patch("kagent.adk.types.httpx.AsyncClient") as mock_client: + cfg.params.httpx_client_factory() + kwargs = mock_client.call_args[1] + assert kwargs["verify"] is fake_ctx + # Caller defaults preserved: follow_redirects + timeout matching + # google-adk's create_mcp_http_client defaults. + assert kwargs["follow_redirects"] is True + assert isinstance(kwargs["timeout"], httpx.Timeout) + + +def test_disable_system_cas_propagates_to_create_ssl_context(): + """``tls_disable_system_cas=True`` is the strict-trust mode: the SSL + context should trust ONLY the supplied CA. The wire-config must pass + the flag through to ``create_ssl_context`` without rewriting.""" + params = StreamableHTTPConnectionParams(url="https://mcp.corp.internal/mcp") + cfg = HttpMcpServerConfig( + params=params, + tools=[], + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", + tls_disable_system_cas=True, + ) + + with mock.patch("kagent.adk.types.create_ssl_context") as mock_create: + cfg._apply_tls_to_params(cfg.params) + + mock_create.assert_called_once_with( + disable_verify=False, + ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", + disable_system_cas=True, + ) + + +def test_factory_forwards_caller_kwargs(): + """google-adk's MCP session manager invokes the factory with optional + headers/timeout/auth on a per-session basis. The TLS-wrapping factory + must forward those kwargs so per-session overrides still work.""" + params = StreamableHTTPConnectionParams(url="https://mcp.corp.internal/mcp") + cfg = HttpMcpServerConfig( + params=params, + tools=[], + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", + ) + with mock.patch("kagent.adk.types.create_ssl_context", return_value=object()): + cfg._apply_tls_to_params(cfg.params) + + auth = httpx.BasicAuth(username="u", password="p") + timeout = httpx.Timeout(60) + with mock.patch("kagent.adk.types.httpx.AsyncClient") as mock_client: + cfg.params.httpx_client_factory( + headers={"X-Token": "abc"}, + timeout=timeout, + auth=auth, + ) + + kwargs = mock_client.call_args[1] + assert kwargs["headers"] == {"X-Token": "abc"} + assert kwargs["timeout"] is timeout + assert kwargs["auth"] is auth + + +def test_sse_params_get_factory_when_supported(): + """SSE transport should receive the same factory treatment when the + installed google-adk version is recent enough to expose + ``httpx_client_factory`` on ``SseConnectionParams`` (≥ 1.28.1, the + kagent-adk pyproject floor).""" + params = SseConnectionParams(url="https://upstream.example.com/sse") + cfg = SseMcpServerConfig( + params=params, + tools=[], + tls_ca_cert_path="/etc/ssl/certs/custom/corp-ca/ca.crt", + ) + + if ( + not hasattr(SseConnectionParams, "model_fields") + or "httpx_client_factory" not in SseConnectionParams.model_fields + ): + pytest.skip("installed google-adk lacks httpx_client_factory on SSE — upgrade to ≥ 1.28.1") + + with mock.patch("kagent.adk.types.create_ssl_context", return_value=object()): + cfg._apply_tls_to_params(cfg.params) + + assert callable(cfg.params.httpx_client_factory) diff --git a/ui/src/types/index.ts b/ui/src/types/index.ts index b4a441ce8e..fb233252ec 100644 --- a/ui/src/types/index.ts +++ b/ui/src/types/index.ts @@ -483,6 +483,7 @@ export interface RemoteMCPServerSpec { timeout?: string; sseReadTimeout?: string; terminateOnClose?: boolean; + tls?: TLSConfig; } export interface RemoteMCPServerResponse { @@ -538,6 +539,14 @@ export interface ToolServerCreateRequest { type: "RemoteMCPServer" | "MCPServer"; remoteMCPServer?: RemoteMCPServer; mcpServer?: MCPServer; + // Optional companion Secrets to create or update alongside the + // ToolServer. Each entry materializes as a key in an Opaque Secret + // owned by the created resource so K8s GC cleans up on delete. + // Names referenced here must match a Secret described in this list + // (e.g. RemoteMCPServer.spec.tls.caCertSecretRef) for inline + // materialization; pre-existing Secrets can also be referenced by + // name without supplying material here. + secrets?: SecretMaterial[]; }