From ff25c8fd8dadf537f869d81ec6f4264f33a1e61d Mon Sep 17 00:00:00 2001 From: Trey Date: Sat, 24 Jan 2026 13:56:25 -0800 Subject: [PATCH 1/7] Initial design doc --- .../THV-00XX-standalone-auth-server-design.md | 1537 +++++++++++++++++ ...HV-00XX-standalone-auth-server-overview.md | 229 +++ 2 files changed, 1766 insertions(+) create mode 100644 rfcs/THV-00XX-standalone-auth-server-design.md create mode 100644 rfcs/THV-00XX-standalone-auth-server-overview.md diff --git a/rfcs/THV-00XX-standalone-auth-server-design.md b/rfcs/THV-00XX-standalone-auth-server-design.md new file mode 100644 index 0000000..593cc67 --- /dev/null +++ b/rfcs/THV-00XX-standalone-auth-server-design.md @@ -0,0 +1,1537 @@ +# Authserver Standalone Kubernetes Deployment Design + +## Overview + +This document describes how `pkg/authserver` could be deployed as a standalone Kubernetes service with mutual TLS (mTLS) authentication between the authserver and proxyrunner components. + +--- + +## Current State + +### Authserver ([pkg/authserver/](pkg/authserver/)) +- Full OAuth 2.0/OIDC authorization server using Fosite +- Discovery endpoints: `/.well-known/openid-configuration`, `/.well-known/oauth-authorization-server`, `/.well-known/jwks.json` +- Authorization flow: `/oauth/authorize` → upstream IDP → `/oauth/callback` → issues own JWT tokens +- JWT sessions with `tsid` claim linking to stored upstream IDP tokens +- Signing key support: RSA/ECDSA/Ed25519 +- In-memory storage only (no persistent backend) +- Key files: + - [server/handlers/handler.go](pkg/authserver/server/handlers/handler.go) - HTTP routing + - [server/handlers/discovery.go](pkg/authserver/server/handlers/discovery.go) - Discovery endpoints + - [server/handlers/authorize.go](pkg/authserver/server/handlers/authorize.go) - Authorization handler + - [server/handlers/callback.go](pkg/authserver/server/handlers/callback.go) - Callback with token exchange + +### Proxyrunner ([cmd/thv-proxyrunner/](cmd/thv-proxyrunner/)) +- Container runner wrapper (not an auth gateway) +- Uses middleware chain (auth, tokenexchange, authz, audit) +- Reads `runconfig.json` for configuration +- No mTLS currently - token-based auth only + +### TLS/Certificate Patterns +- [pkg/networking/http_client.go](pkg/networking/http_client.go) - `HttpClientBuilder` with CA bundle support +- No client certificate support for mTLS +- Kubernetes uses ConfigMaps for CA distribution +- No cert-manager integration + +--- + +## Proposed Architecture + +### 1. Kubernetes Deployment Model + +#### New CRD: MCPAuthServer + +Following the [MCPServer CRD pattern](cmd/thv-operator/api/v1alpha1/mcpserver_types.go): + +```yaml +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPAuthServer +metadata: + name: main-authserver + namespace: toolhive-system +spec: + issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" + replicas: 2 + port: 8443 + + upstreamIdp: + type: oidc + oidc: + issuer: "https://accounts.google.com" + clientId: "..." + clientSecretRef: + name: authserver-secrets + key: oidc-client-secret + + signingKey: + secretRef: + name: authserver-signing-key + key: private.pem + algorithm: RS256 + + tls: + # Server certificate for authserver's HTTPS endpoint + certificateRef: + name: mcp-authserver-tls # cert-manager Certificate + + # mTLS: Client CA and validation rules for proxyrunner authentication + clientAuth: + # CA bundle for validating proxyrunner client certificates + caBundle: + configMapRef: + name: toolhive-mtls-ca-bundle + key: ca.crt + + # Allowed client certificate patterns (for access control) + allowedSubjects: + # Allow proxyrunners from specific namespaces + organizationalUnits: + - "toolhive-system" + - "mcp-servers" + - "mcp-production" + # CN pattern: {mcpserver-name}.proxyrunner.{namespace}.toolhive.local + commonNamePattern: "^[a-z0-9-]+\\.proxyrunner\\.[a-z0-9-]+\\.toolhive\\.local$" +``` + +**MCPAuthServer CRD mTLS Types:** + +```go +// AuthServerTLSConfig configures TLS and mTLS for the authserver +type AuthServerTLSConfig struct { + // CertificateRef references a cert-manager Certificate for server TLS + // +kubebuilder:validation:Required + CertificateRef CertificateReference `json:"certificateRef"` + + // ClientAuth configures mTLS client certificate validation + // +optional + ClientAuth *ClientAuthConfig `json:"clientAuth,omitempty"` +} + +// ClientAuthConfig configures mTLS client verification for proxyrunners +type ClientAuthConfig struct { + // CABundle references a ConfigMap containing the CA certificate for + // validating proxyrunner client certificates + // +kubebuilder:validation:Required + CABundle CABundleSource `json:"caBundle"` + + // AllowedSubjects restricts which client certificates are accepted + // If not specified, any certificate signed by the CA is accepted + // +optional + AllowedSubjects *AllowedSubjects `json:"allowedSubjects,omitempty"` +} + +// AllowedSubjects defines which certificate subjects are allowed to connect +type AllowedSubjects struct { + // OrganizationalUnits is a list of allowed OU values (typically namespaces) + // Client cert must have at least one matching OU + // +optional + OrganizationalUnits []string `json:"organizationalUnits,omitempty"` + + // CommonNamePattern is a regex pattern for allowed CN values + // +optional + CommonNamePattern string `json:"commonNamePattern,omitempty"` +} +``` + +#### Resources Created by Controller +1. **Deployment** - Authserver pods with mTLS configuration +2. **Service** - ClusterIP for internal access (port 443 → 8443) +3. **ConfigMap** - Runtime configuration +4. **ServiceAccount** - Kubernetes RBAC identity + +#### MCPServer CRD Updates for mTLS Client Certificate + +Add a new field to `MCPServerSpec` for configuring the client certificate used when the proxyrunner communicates with the authserver: + +**CRD Addition** ([cmd/thv-operator/api/v1alpha1/mcpserver_types.go](cmd/thv-operator/api/v1alpha1/mcpserver_types.go)): + +```go +// MCPServerSpec defines the desired state of MCPServer +type MCPServerSpec struct { + // ... existing fields ... + + // AuthServerClientConfig configures how the proxyrunner authenticates to the authserver + // +optional + AuthServerClientConfig *AuthServerClientConfig `json:"authServerClientConfig,omitempty"` +} + +// AuthServerClientConfig configures mTLS client authentication to the authserver +type AuthServerClientConfig struct { + // URL is the authserver base URL + // +kubebuilder:validation:Required + URL string `json:"url"` + + // ClientCertificateRef references a cert-manager Certificate for mTLS client auth + // If specified, the controller creates the Certificate and mounts it to the pod + // +optional + ClientCertificateRef *ClientCertificateConfig `json:"clientCertificateRef,omitempty"` + + // CABundleRef references a ConfigMap containing the CA bundle for verifying authserver + // Reuses existing CABundleSource type (defined at mcpserver_types.go:493-499) + // +optional + CABundleRef *CABundleSource `json:"caBundleRef,omitempty"` +} + +// ClientCertificateConfig configures automatic client certificate provisioning +type ClientCertificateConfig struct { + // IssuerRef references the cert-manager ClusterIssuer to use + // +kubebuilder:validation:Required + IssuerRef CertManagerIssuerReference `json:"issuerRef"` + + // Duration is the certificate validity period (default: 2160h / 90 days) + // +kubebuilder:default="2160h" + // +optional + Duration string `json:"duration,omitempty"` + + // RenewBefore is when to renew before expiry (default: 360h / 15 days) + // +kubebuilder:default="360h" + // +optional + RenewBefore string `json:"renewBefore,omitempty"` +} + +// CertManagerIssuerReference references a cert-manager Issuer or ClusterIssuer +type CertManagerIssuerReference struct { + // Name of the issuer + Name string `json:"name"` + + // Kind is "Issuer" or "ClusterIssuer" + // +kubebuilder:validation:Enum=Issuer;ClusterIssuer + // +kubebuilder:default=ClusterIssuer + Kind string `json:"kind,omitempty"` +} +``` + +**Example MCPServer with mTLS:** + +```yaml +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPServer +metadata: + name: github-tools + namespace: mcp-servers +spec: + image: ghcr.io/example/github-mcp:latest + + # OIDC config points to the authserver (for token validation) + oidcConfig: + type: inline + resourceUrl: "https://github-tools.example.com/" + inline: + issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" + audience: "github-tools" + + # NEW: mTLS client config for proxyrunner → authserver communication + authServerClientConfig: + url: "https://mcp-authserver.toolhive-system.svc.cluster.local" + + # Controller will create a cert-manager Certificate with: + # - CN: github-tools.proxyrunner.mcp-servers.toolhive.local + # - O: ToolHive ProxyRunner + # - OU: mcp-servers + clientCertificateRef: + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + duration: "2160h" # 90 days + renewBefore: "360h" # 15 days + + # CA bundle for verifying authserver's server certificate + caBundleRef: + configMapRef: + name: toolhive-mtls-ca-bundle + key: ca.crt +``` + +**Controller Behavior:** + +When `authServerClientConfig.clientCertificateRef` is specified, the MCPServer controller: + +1. **Creates a cert-manager Certificate:** + ```yaml + apiVersion: cert-manager.io/v1 + kind: Certificate + metadata: + name: github-tools-proxyrunner-client + namespace: mcp-servers + spec: + secretName: github-tools-proxyrunner-client-tls + commonName: github-tools.proxyrunner.mcp-servers.toolhive.local + subject: + organizations: ["ToolHive ProxyRunner"] + organizationalUnits: ["mcp-servers"] + usages: ["client auth"] + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + ``` + +2. **Mounts the Secret to the proxyrunner pod:** + ```yaml + volumes: + - name: authserver-client-cert + secret: + secretName: github-tools-proxyrunner-client-tls + volumeMounts: + - name: authserver-client-cert + mountPath: /etc/toolhive/authserver-mtls + readOnly: true + ``` + +3. **Sets environment variables or runconfig:** + ```json + { + "authserver_config": { + "url": "https://mcp-authserver.toolhive-system.svc.cluster.local", + "client_cert_path": "/etc/toolhive/authserver-mtls/tls.crt", + "client_key_path": "/etc/toolhive/authserver-mtls/tls.key", + "ca_bundle_path": "/etc/toolhive/authserver-mtls/ca.crt" + } + } + ``` + +--- + +### 2. OAuth Discovery Flow (RFC 9728) + +The client initially only knows about the MCP server (proxyrunner). Discovery happens via the RFC 9728 Protected Resource Metadata flow: + +``` +┌─────────────┐ ┌─────────────────┐ ┌──────────────────┐ +│ Client │ │ ProxyRunner │ │ AuthServer │ +│ (Agent) │ │ (MCP Server) │ │ │ +└──────┬──────┘ └────────┬────────┘ └────────┬─────────┘ + │ │ │ + │ 1. MCP Request (no auth) │ │ + │───────────────────────────────────►│ │ + │ │ │ + │ 2. 401 Unauthorized │ │ + │ WWW-Authenticate: Bearer │ │ + │ resource_metadata="/.well-known/oauth-protected-resource" │ + │◄───────────────────────────────────│ │ + │ │ │ + │ 3. GET /.well-known/oauth-protected-resource │ + │───────────────────────────────────►│ │ + │ │ │ + │ 4. RFC 9728 Protected Resource Metadata │ + │ { "authorization_servers": ["https://mcp-authserver..."], │ + │ "resource": "...", "jwks_uri": "..." } │ + │◄───────────────────────────────────│ │ + │ │ │ + │ 5. GET /.well-known/openid-configuration (to authserver) │ + │─────────────────────────────────────────────────────────────────────────► + │ │ │ + │ 6. OIDC Discovery Document │ + │ { "issuer": "...", "authorization_endpoint": "...", │ + │ "token_endpoint": "...", "jwks_uri": "..." } │ + │◄───────────────────────────────────────────────────────────────────────── + │ │ │ + │ 7. OAuth Authorization Flow (PKCE) │ │ + │─────────────────────────────────────────────────────────────────────────► + │ │ │ + │ 8. Access Token (JWT) │ │ + │◄───────────────────────────────────────────────────────────────────────── + │ │ │ + │ 9. MCP Request + Bearer Token │ │ + │───────────────────────────────────►│ │ + │ │ │ + │ 10. Success │ │ + │◄───────────────────────────────────│ │ +``` + +#### ProxyRunner's Protected Resource Metadata (Step 4) + +The proxyrunner exposes `/.well-known/oauth-protected-resource` via [pkg/auth/token.go:NewAuthInfoHandler()](pkg/auth/token.go#L943): + +```json +{ + "resource": "https://my-mcp-server.example.com/", + "authorization_servers": ["https://mcp-authserver.toolhive-system.svc.cluster.local"], + "bearer_methods_supported": ["header"], + "jwks_uri": "https://mcp-authserver.../.well-known/jwks.json", + "scopes_supported": ["openid", "mcp"] +} +``` + +The `authorization_servers` field tells clients where the authserver is located. + +#### Authserver OIDC Discovery (Step 6) + +```json +{ + "issuer": "https://mcp-authserver.toolhive-system.svc.cluster.local", + "authorization_endpoint": "https://mcp-authserver.../oauth/authorize", + "token_endpoint": "https://mcp-authserver.../oauth/token", + "jwks_uri": "https://mcp-authserver.../.well-known/jwks.json", + "response_types_supported": ["code"], + "grant_types_supported": ["authorization_code", "refresh_token"], + "code_challenge_methods_supported": ["S256"], + "subject_types_supported": ["public"], + "id_token_signing_alg_values_supported": ["RS256"] +} +``` + +#### How ProxyRunner Exposes `/.well-known/oauth-protected-resource` + +**Existing wiring (already implemented):** + +1. **CRD Configuration** ([cmd/thv-operator/api/v1alpha1/mcpserver_types.go:408-432](cmd/thv-operator/api/v1alpha1/mcpserver_types.go#L408-L432)): + ```yaml + apiVersion: toolhive.stacklok.dev/v1alpha1 + kind: MCPServer + spec: + oidcConfig: + type: inline + resourceUrl: "https://my-mcp-server.example.com/" # For protected resource metadata + inline: + issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" + audience: "my-mcp-server" + ``` + +2. **Auth middleware creation** ([pkg/auth/utils.go:63-77](pkg/auth/utils.go#L63-L77)): + ```go + func GetAuthenticationMiddleware(ctx context.Context, oidcConfig *TokenValidatorConfig) (...) { + jwtValidator, err := NewTokenValidator(ctx, *oidcConfig) + // Creates the handler that returns RFC 9728 metadata + authInfoHandler := NewAuthInfoHandler( + oidcConfig.Issuer, // → "authorization_servers" field + jwtValidator.jwksURL, // → "jwks_uri" field + oidcConfig.ResourceURL, // → "resource" field + oidcConfig.Scopes, // → "scopes_supported" field + ) + return jwtValidator.Middleware, authInfoHandler, nil + } + ``` + +3. **Handler returns metadata** ([pkg/auth/token.go:943-993](pkg/auth/token.go#L943-L993)): + ```go + func NewAuthInfoHandler(issuer, jwksURL, resourceURL string, scopes []string) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + authInfo := RFC9728AuthInfo{ + Resource: resourceURL, + AuthorizationServers: []string{issuer}, // Points to authserver! + BearerMethodsSupported: []string{"header"}, + JWKSURI: jwksURL, + ScopesSupported: scopes, + } + json.NewEncoder(w).Encode(authInfo) + }) + } + ``` + +4. **Proxy wires up endpoint** ([pkg/transport/proxy/transparent/transparent_proxy.go:450-454](pkg/transport/proxy/transparent/transparent_proxy.go#L450-L454)): + ```go + // In Start() method + if wellKnownHandler := auth.NewWellKnownHandler(p.authInfoHandler); wellKnownHandler != nil { + mux.Handle("/.well-known/", wellKnownHandler) + logger.Info("RFC 9728 OAuth discovery endpoints enabled") + } + ``` + +**What's already working:** +- ProxyRunner exposes `/.well-known/oauth-protected-resource` ✅ +- Returns `authorization_servers` pointing to issuer ✅ +- Clients can discover where to authenticate ✅ + +**What needs to be added for standalone authserver:** +- Currently the `issuer` in OIDC config points to the external IDP (e.g., Google) +- For standalone authserver, the `issuer` would point to the authserver instead +- The authserver then federates to the upstream IDP internally + +--- + +### 3. Mutual Authentication Design + +#### Why mTLS Between Proxyrunner and Authserver? + +The authserver stores **upstream IDP tokens** (access tokens, refresh tokens) and links them to the JWTs it issues via the `tsid` (token session ID) claim. When a proxyrunner receives a client request with a JWT, it may need to: + +1. **Retrieve the upstream access token** to pass to backend MCP servers that require the original IDP token +2. **Exchange tokens** (RFC 8693) to get a backend-specific token +3. **Refresh expired upstream tokens** on behalf of the client + +This is sensitive because: +- **Upstream tokens are valuable** - they grant access to external services (Google, GitHub, etc.) +- **Only authorized proxyrunners should access them** - a compromised or rogue service shouldn't be able to request tokens for arbitrary sessions +- **Token binding** - the authserver needs to verify that the proxyrunner requesting tokens is the one the client intended to use + +**mTLS provides strong mutual authentication:** +- Authserver knows which proxyrunner is making the request (certificate identity) +- Proxyrunner can't be impersonated without the private key +- No shared secrets to manage or rotate (cert-manager handles lifecycle) + +#### Certificate Trust Chain + +Each MCPServer gets its own client certificate. This allows the authserver to identify which proxyrunner is making requests (useful for audit logging and access control). + +``` + ┌─────────────────────┐ + │ cert-manager CA │ + │ (ClusterIssuer) │ + │ "toolhive-mtls-ca" │ + └─────────┬───────────┘ + │ signs all certs + │ + ┌────────────────────────┼────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌───────────────┐ ┌───────────────┐ ┌───────────────┐ +│ AuthServer │ │ MCPServer "A" │ │ MCPServer "B" │ +│ Server Cert │ │ Client Cert │ │ Client Cert │ +│ (server auth) │ │ (client auth) │ │ (client auth) │ +│ │ │ │ │ │ +│ CN: mcp-auth │ │ CN: a.proxy │ │ CN: b.proxy │ +│ server... │ │ runner... │ │ runner... │ +└───────────────┘ └───────────────┘ └───────────────┘ + │ │ │ + │ │ │ + └──────────────────────┴──────────────────────┘ + mTLS connection + (proxyrunners → authserver) +``` + +**Why per-MCPServer certificates:** +- **Identity**: Authserver knows exactly which MCP server is requesting tokens +- **Audit**: Logs can show "MCPServer 'github-tools' retrieved token for session X" +- **Access control**: Could restrict which proxyrunners can access which sessions +- **Revocation**: Can revoke a single proxyrunner's access without affecting others + +#### How Proxyrunner Authenticates to Authserver + +1. **Client Certificate Identity**: + ``` + CN=myserver.proxyrunner.mcp-servers.toolhive.local + O=ToolHive ProxyRunner + OU=mcp-servers (namespace) + ``` + +2. **mTLS Handshake**: + - Proxyrunner presents client certificate signed by shared CA + - Authserver verifies certificate chain against trusted CA + - Authserver extracts identity from certificate subject + +3. **Identity Binding**: + - Authserver binds proxyrunner identity to token exchange requests + - Issued tokens include proxyrunner identity in claims + +--- + +### 4. Cert-Manager Integration + +#### CA Secret (Root of Trust) + +**Option 1: Cert-manager self-signed CA (dev/test)** + +For development and testing, let cert-manager generate a self-signed CA: + +```yaml +# Bootstrap issuer (self-signed) +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: toolhive-selfsigned-bootstrap +spec: + selfSigned: {} +--- +# CA Certificate (generated by bootstrap issuer) +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: toolhive-mtls-ca + namespace: cert-manager +spec: + isCA: true + commonName: "ToolHive mTLS CA" + subject: + organizations: + - ToolHive + organizationalUnits: + - Platform + secretName: toolhive-mtls-ca-keypair + duration: 87600h # 10 years + renewBefore: 8760h # Renew 1 year before expiry + privateKey: + algorithm: RSA + size: 4096 + issuerRef: + name: toolhive-selfsigned-bootstrap + kind: ClusterIssuer +``` + +**Option 2: External CA (production)** + +For production, use an enterprise PKI or cloud-based CA service: + +```yaml +# Example: HashiCorp Vault as CA +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: toolhive-mtls-ca +spec: + vault: + server: https://vault.example.com + path: pki/sign/toolhive-mtls + auth: + kubernetes: + role: cert-manager + mountPath: /v1/auth/kubernetes + secretRef: + name: vault-token + key: token +--- +# Example: AWS Private CA +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: toolhive-mtls-ca +spec: + acmPCA: + arn: arn:aws:acm-pca:us-east-1:123456789:certificate-authority/abc-123 + region: us-east-1 +``` + +Production considerations: +- Store CA private key in HSM or managed service (Vault, AWS Private CA, Google Cloud CA) +- Use short-lived certificates (90 days) with automatic renewal +- Implement certificate revocation (CRL or OCSP) +- Audit certificate issuance + +#### ClusterIssuer for mTLS + +Once the CA secret exists, create the ClusterIssuer that will sign all mTLS certificates: + +```yaml +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: toolhive-mtls-ca +spec: + ca: + secretName: toolhive-mtls-ca-keypair +``` + +#### Authserver Server Certificate + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: mcp-authserver-tls + namespace: toolhive-system +spec: + secretName: mcp-authserver-tls + duration: 8760h # 1 year + renewBefore: 720h # 30 days + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + commonName: mcp-authserver.toolhive-system.svc.cluster.local + dnsNames: + - mcp-authserver.toolhive-system.svc.cluster.local + - mcp-authserver.toolhive-system.svc + - mcp-authserver + usages: + - server auth +``` + +#### ProxyRunner Client Certificate (per MCPServer) + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: ${MCPSERVER_NAME}-proxyrunner-client + namespace: ${NAMESPACE} +spec: + secretName: ${MCPSERVER_NAME}-proxyrunner-client-tls + duration: 2160h # 90 days + renewBefore: 360h # 15 days + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + commonName: ${MCPSERVER_NAME}.proxyrunner.${NAMESPACE}.toolhive.local + subject: + organizations: + - "ToolHive ProxyRunner" + organizationalUnits: + - ${NAMESPACE} + usages: + - client auth +``` + +#### HttpClientBuilder Extension + +Extend [pkg/networking/http_client.go](pkg/networking/http_client.go): + +```go +// Add to HttpClientBuilder +func (b *HttpClientBuilder) WithClientCertificate(certPath, keyPath string) *HttpClientBuilder { + b.clientCertPath = certPath + b.clientKeyPath = keyPath + return b +} + +// In Build() method +if b.clientCertPath != "" && b.clientKeyPath != "" { + cert, err := tls.LoadX509KeyPair(b.clientCertPath, b.clientKeyPath) + if err != nil { + return nil, fmt.Errorf("failed to load client certificate: %w", err) + } + transport.TLSClientConfig.Certificates = []tls.Certificate{cert} +} +``` + +--- + +### 5. Token Flow + +#### Why Token Exchange is Needed + +When a client authenticates, the authserver: +1. Redirects to upstream IDP (Google, GitHub, etc.) +2. Receives upstream tokens (access token, refresh token, ID token) +3. **Stores these tokens** linked to a session ID (`tsid`) +4. Issues its own JWT containing the `tsid` claim + +The client's JWT **does not contain the upstream tokens** - only a reference to them. When the proxyrunner needs to call a backend that requires the upstream token (e.g., a GitHub MCP server needs a GitHub access token), it must: +1. Extract `tsid` from the client's JWT +2. Call authserver to retrieve the upstream access token +3. Use that token to authenticate to the backend + +#### Complete Flow with Upstream Token Retrieval + +``` +┌─────────────┐ ┌─────────────────┐ ┌──────────────────┐ ┌──────────┐ +│ Client │ │ ProxyRunner │ │ AuthServer │ │ Upstream │ +│ (Agent) │ │ │ │ │ │ IDP │ +└──────┬──────┘ └────────┬────────┘ └────────┬─────────┘ └────┬─────┘ + │ │ │ │ + │ == INITIAL AUTH (via discovery flow above) ======================== + │ │ │ │ + │ │ │ 1. User auth flow │ + │ │ │◄──────────────────►│ + │ │ │ │ + │ │ │ 2. Authserver receives & STORES + │ │ │ upstream tokens (linked to tsid) + │ │ │ │ + │◄────────────────────┼───────────────────────│ │ + │ 3. Authserver JWT { sub, tsid, ... } │ │ + │ (tsid links to stored upstream tokens) │ │ + │ │ │ │ + │ == SUBSEQUENT MCP REQUESTS ========================================= + │ │ │ │ + │ 4. MCP Request │ │ │ + │ + JWT │ │ │ + │────────────────────►│ │ │ + │ │ │ │ + │ │ 5. Validate JWT, │ │ + │ │ extract tsid │ │ + │ │ │ │ + │ │ 6. GET /internal/tokens/{tsid} │ + │ │ (mTLS client cert authenticates │ + │ │ proxyrunner identity) │ + │ │───────────────────────► │ + │ │ │ │ + │ │ 7. Authserver verifies: │ + │ │ - mTLS cert is valid proxyrunner │ + │ │ - tsid exists and is not expired │ + │ │ - (optional) proxyrunner is authorized │ + │ │ for this session │ + │ │ │ │ + │ │◄──────────────────────│ │ + │ │ 8. Upstream access token │ + │ │ (the actual IDP token) │ + │ │ │ │ + │ │ 9. Call backend MCP server │ + │ │ with upstream token │ │ + │ │───────────────────────┼───────────────────►│ + │ │ │ │ + │◄────────────────────│ │ │ + │ 10. MCP Response │ │ │ +``` + +#### Proxyrunner Token Exchange Endpoint (new) + +Add to authserver handlers: + +```go +// pkg/authserver/server/handlers/proxyrunner_exchange.go + +// ProxyRunnerIdentity represents the identity extracted from a proxyrunner's mTLS client certificate +type ProxyRunnerIdentity struct { + // Name is the MCPServer name (extracted from CN before ".proxyrunner") + Name string + // Namespace is the Kubernetes namespace (from OU) + Namespace string + // FullCN is the complete Common Name + FullCN string + // CertificateSerial is the certificate serial number (for audit logging) + CertificateSerial string +} + +// extractProxyRunnerIdentity extracts the proxyrunner identity from the mTLS client certificate. +// Returns an error if no client certificate is present or if the certificate doesn't match +// the expected proxyrunner certificate format. +func extractProxyRunnerIdentity(r *http.Request) (*ProxyRunnerIdentity, error) { + // Check for TLS connection + if r.TLS == nil { + return nil, errors.New("connection is not TLS") + } + + // Check for peer (client) certificates + if len(r.TLS.PeerCertificates) == 0 { + return nil, errors.New("no client certificate provided") + } + + // Use the leaf certificate (first in chain) + cert := r.TLS.PeerCertificates[0] + + // Extract namespace from OU (Organizational Unit) + // Certificate format: OU=mcp-servers (the namespace) + var namespace string + if len(cert.Subject.OrganizationalUnit) > 0 { + namespace = cert.Subject.OrganizationalUnit[0] + } else { + return nil, errors.New("client certificate missing OU (namespace)") + } + + // Extract MCPServer name from CN + // Certificate format: CN=github-tools.proxyrunner.mcp-servers.toolhive.local + cn := cert.Subject.CommonName + if cn == "" { + return nil, errors.New("client certificate missing CN") + } + + // Parse CN to extract MCPServer name + // Expected format: {mcpserver-name}.proxyrunner.{namespace}.toolhive.local + parts := strings.Split(cn, ".") + if len(parts) < 4 || parts[1] != "proxyrunner" { + return nil, fmt.Errorf("invalid CN format: expected {name}.proxyrunner.{ns}.toolhive.local, got %s", cn) + } + mcpServerName := parts[0] + + // Verify namespace in CN matches OU + cnNamespace := parts[2] + if cnNamespace != namespace { + return nil, fmt.Errorf("CN namespace (%s) doesn't match OU namespace (%s)", cnNamespace, namespace) + } + + return &ProxyRunnerIdentity{ + Name: mcpServerName, + Namespace: namespace, + FullCN: cn, + CertificateSerial: cert.SerialNumber.String(), + }, nil +} + +// Handler struct update (in pkg/authserver/server/handlers/handler.go) +// Add subjectValidator field for mTLS access control +type Handler struct { + fositeProvider fosite.OAuth2Provider + config *server.AuthorizationServerConfig + storage storage.Storage + upstreamIdP upstream.OAuth2Provider + + // NEW: Validator for proxyrunner mTLS client certificates + // Initialized from MCPAuthServer CRD's tls.clientAuth.allowedSubjects + subjectValidator *SubjectValidator +} + +// NewHandler initialization (in pkg/authserver/server/handlers/handler.go) +// Updated to accept mTLS configuration +func NewHandler( + fositeProvider fosite.OAuth2Provider, + config *server.AuthorizationServerConfig, + storage storage.Storage, + upstreamIdP upstream.OAuth2Provider, + allowedSubjects *AllowedSubjects, // NEW: from MCPAuthServer CRD +) (*Handler, error) { + // Create subject validator from CRD config + subjectValidator, err := NewSubjectValidator(allowedSubjects) + if err != nil { + return nil, fmt.Errorf("invalid allowedSubjects config: %w", err) + } + + return &Handler{ + fositeProvider: fositeProvider, + config: config, + storage: storage, + upstreamIdP: upstreamIdP, + subjectValidator: subjectValidator, + }, nil +} + +// SubjectValidator validates client certificate subjects against allowed patterns +type SubjectValidator struct { + // allowedOUs is a set of allowed Organizational Unit values (typically namespaces) + allowedOUs map[string]bool + // cnPattern is a compiled regex for validating Common Name format + cnPattern *regexp.Regexp +} + +// NewSubjectValidator creates a validator from MCPAuthServer CRD configuration +func NewSubjectValidator(allowedSubjects *AllowedSubjects) (*SubjectValidator, error) { + if allowedSubjects == nil { + // No restrictions - all valid certificates are allowed + return &SubjectValidator{}, nil + } + + validator := &SubjectValidator{} + + // Build allowed OU set + if len(allowedSubjects.OrganizationalUnits) > 0 { + validator.allowedOUs = make(map[string]bool, len(allowedSubjects.OrganizationalUnits)) + for _, ou := range allowedSubjects.OrganizationalUnits { + validator.allowedOUs[ou] = true + } + } + + // Compile CN pattern + if allowedSubjects.CommonNamePattern != "" { + pattern, err := regexp.Compile(allowedSubjects.CommonNamePattern) + if err != nil { + return nil, fmt.Errorf("invalid commonNamePattern: %w", err) + } + validator.cnPattern = pattern + } + + return validator, nil +} + +// validateSubjectAllowed checks if a proxyrunner identity is allowed based on +// the allowedSubjects configuration from the MCPAuthServer CRD. +// +// Returns nil if allowed, error if rejected. +func (v *SubjectValidator) validateSubjectAllowed(identity *ProxyRunnerIdentity) error { + // If no restrictions configured, allow all + if v.allowedOUs == nil && v.cnPattern == nil { + return nil + } + + // Check OU (namespace) restriction + if v.allowedOUs != nil { + if !v.allowedOUs[identity.Namespace] { + return fmt.Errorf("namespace %q is not in allowed list", identity.Namespace) + } + } + + // Check CN pattern restriction + if v.cnPattern != nil { + if !v.cnPattern.MatchString(identity.FullCN) { + return fmt.Errorf("CN %q does not match allowed pattern", identity.FullCN) + } + } + + return nil +} + +// validateSessionAudience verifies that the proxyrunner is authorized to access this session. +// The session's audience (from the JWT) should match the MCPServer identified by the client cert. +// +// This prevents a compromised proxyrunner in namespace A from requesting tokens for sessions +// that were intended for a different MCPServer in namespace B. +// +// Audience matching rules: +// 1. If JWT has "aud" claim, it must match the proxyrunner's MCPServer name +// 2. The namespace/name combination provides additional binding +func (h *Handler) validateSessionAudience(claims map[string]interface{}, identity *ProxyRunnerIdentity) error { + // Extract audience from JWT claims + // Audience can be a string or array of strings + var audiences []string + switch aud := claims["aud"].(type) { + case string: + audiences = []string{aud} + case []interface{}: + for _, a := range aud { + if s, ok := a.(string); ok { + audiences = append(audiences, s) + } + } + case nil: + // No audience claim - skip validation + // This is acceptable for tokens that don't specify a target MCPServer + return nil + default: + return fmt.Errorf("unexpected audience claim type: %T", aud) + } + + if len(audiences) == 0 { + return nil // No audience restriction + } + + // Build expected audience values for this proxyrunner + // Accept either: + // - Just the MCPServer name: "github-tools" + // - Fully qualified: "github-tools.mcp-servers" (name.namespace) + // - Service URL format: "https://github-tools.mcp-servers.svc.cluster.local" + expectedAudiences := map[string]bool{ + identity.Name: true, + fmt.Sprintf("%s.%s", identity.Name, identity.Namespace): true, + } + + // Check if any JWT audience matches expected + for _, aud := range audiences { + // Direct match + if expectedAudiences[aud] { + return nil + } + // URL-based match: extract hostname and compare + if u, err := url.Parse(aud); err == nil && u.Host != "" { + hostname := strings.Split(u.Host, ".")[0] + if hostname == identity.Name { + return nil + } + } + } + + return fmt.Errorf("token audience %v does not match proxyrunner %s/%s", + audiences, identity.Namespace, identity.Name) +} + +func (h *Handler) ProxyRunnerTokenExchangeHandler(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + // 1. Extract proxyrunner identity from mTLS client cert + identity, err := extractProxyRunnerIdentity(r) + if err != nil { + logger.Warnf("mTLS identity extraction failed: %v", err) + h.writeError(w, fosite.ErrAccessDenied.WithHint("mTLS client certificate required")) + return + } + + logger.Infof("Token exchange request from proxyrunner %s/%s (cert serial: %s)", + identity.Namespace, identity.Name, identity.CertificateSerial) + + // 2. Validate proxyrunner is allowed based on allowedSubjects from MCPAuthServer CRD + // h.subjectValidator is initialized from config at startup + if err := h.subjectValidator.validateSubjectAllowed(identity); err != nil { + logger.Warnf("Proxyrunner %s/%s rejected by subject policy: %v", + identity.Namespace, identity.Name, err) + h.writeError(w, fosite.ErrAccessDenied.WithHintf( + "proxyrunner not allowed: %s", err.Error())) + return + } + + // 3. Validate incoming client JWT + clientToken := r.FormValue("subject_token") + if clientToken == "" { + h.writeError(w, fosite.ErrInvalidRequest.WithHint("subject_token required")) + return + } + + claims, err := h.validateClientToken(ctx, clientToken) + if err != nil { + logger.Warnf("Client token validation failed: %v", err) + h.writeError(w, fosite.ErrInvalidGrant.WithHint("invalid subject_token")) + return + } + + // 4. Extract session ID from JWT's tsid claim + tsid, ok := claims["tsid"].(string) + if !ok || tsid == "" { + h.writeError(w, fosite.ErrInvalidGrant.WithHint("token missing tsid claim")) + return + } + + // 5. Retrieve upstream tokens using session ID + upstreamTokens, err := h.storage.GetUpstreamTokens(ctx, tsid) + if err != nil { + logger.Warnf("Failed to retrieve upstream tokens for tsid %s: %v", tsid, err) + h.writeError(w, fosite.ErrInvalidRequest.WithHint("session not found or expired")) + return + } + + // 6. Verify proxyrunner is authorized for this specific session + // The session's audience should match the MCPServer making the request + if err := h.validateSessionAudience(claims, identity); err != nil { + logger.Warnf("Session audience mismatch for proxyrunner %s/%s: %v", + identity.Namespace, identity.Name, err) + h.writeError(w, fosite.ErrAccessDenied.WithHintf( + "proxyrunner not authorized for this session: %s", err.Error())) + return + } + + // 7. Return the upstream access token to the proxyrunner + h.writeTokenResponse(w, &TokenExchangeResponse{ + AccessToken: upstreamTokens.AccessToken, + TokenType: "Bearer", + ExpiresIn: int(time.Until(upstreamTokens.ExpiresAt).Seconds()), + // Don't return refresh token - proxyrunner should call this endpoint again + }) +} + +type TokenExchangeResponse struct { + AccessToken string `json:"access_token"` + TokenType string `json:"token_type"` + ExpiresIn int `json:"expires_in,omitempty"` +} +``` + +#### ProxyRunner Client-Side Token Exchange (MCPServer) + +The proxyrunner needs a client to call the authserver's token exchange endpoint with mTLS and caching. + +**New package: `pkg/auth/authserver/` (or `pkg/runner/authserver/`)** + +Location options: +- `pkg/auth/authserver/` - Keeps auth-related code together (recommended) +- `pkg/runner/authserver/` - Closer to where proxyrunner uses it +- `pkg/authserver/client/` - Keeps authserver code together but creates circular dependency risk + +```go +// pkg/auth/authserver/client.go (recommended location) + +// AuthServerClient handles communication with the authserver for token exchange +type AuthServerClient struct { + httpClient *http.Client + baseURL string + tokenCache *TokenCache + mu sync.RWMutex +} + +// Config holds configuration for the authserver client +type Config struct { + // URL is the authserver base URL + URL string + // ClientCertPath is the path to the client certificate for mTLS + ClientCertPath string + // ClientKeyPath is the path to the client key for mTLS + ClientKeyPath string + // CABundlePath is the path to the CA bundle for verifying authserver + CABundlePath string + // CacheTTL is how long to cache tokens (default: 80% of token lifetime) + CacheTTL time.Duration +} + +// NewAuthServerClient creates a new client with mTLS configuration +func NewAuthServerClient(cfg *Config) (*AuthServerClient, error) { + // Build HTTP client with mTLS using existing HttpClientBuilder + clientBuilder := networking.NewHttpClientBuilder() + + // Add CA bundle for server certificate validation + if cfg.CABundlePath != "" { + clientBuilder = clientBuilder.WithCABundle(cfg.CABundlePath) + } + + // Add client certificate for mTLS + if cfg.ClientCertPath != "" && cfg.ClientKeyPath != "" { + clientBuilder = clientBuilder.WithClientCertificate(cfg.ClientCertPath, cfg.ClientKeyPath) + } + + httpClient, err := clientBuilder.Build() + if err != nil { + return nil, fmt.Errorf("failed to create HTTP client: %w", err) + } + + return &AuthServerClient{ + httpClient: httpClient, + baseURL: strings.TrimSuffix(cfg.URL, "/"), + tokenCache: NewTokenCache(cfg.CacheTTL), + }, nil +} + +// ExchangeToken exchanges a client JWT for an upstream access token. +// Results are cached to avoid repeated calls for the same session. +func (c *AuthServerClient) ExchangeToken(ctx context.Context, clientJWT string) (*TokenExchangeResult, error) { + // Extract tsid from JWT for cache key (without validating - authserver will validate) + tsid, err := extractTSIDFromJWT(clientJWT) + if err != nil { + return nil, fmt.Errorf("failed to extract tsid from JWT: %w", err) + } + + // Check cache first + if cached := c.tokenCache.Get(tsid); cached != nil { + logger.Debugf("Token cache hit for tsid %s", tsid) + return cached, nil + } + + // Cache miss - call authserver + logger.Debugf("Token cache miss for tsid %s, calling authserver", tsid) + + result, err := c.doTokenExchange(ctx, clientJWT) + if err != nil { + return nil, err + } + + // Cache the result (with TTL based on token expiry) + c.tokenCache.Set(tsid, result) + + return result, nil +} + +// doTokenExchange makes the actual HTTP request to the authserver +func (c *AuthServerClient) doTokenExchange(ctx context.Context, clientJWT string) (*TokenExchangeResult, error) { + // Build request body + form := url.Values{} + form.Set("grant_type", "urn:ietf:params:oauth:grant-type:token-exchange") + form.Set("subject_token", clientJWT) + form.Set("subject_token_type", "urn:ietf:params:oauth:token-type:jwt") + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, + c.baseURL+"/internal/token-exchange", + strings.NewReader(form.Encode())) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("token exchange request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return nil, fmt.Errorf("token exchange failed with status %d: %s", + resp.StatusCode, string(body)) + } + + var result TokenExchangeResult + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("failed to decode response: %w", err) + } + + // Compute absolute expiry time + if result.ExpiresIn > 0 { + result.ExpiresAt = time.Now().Add(time.Duration(result.ExpiresIn) * time.Second) + } + + return &result, nil +} + +// extractTSIDFromJWT extracts the tsid claim from a JWT without full validation +// (authserver will perform full validation) +func extractTSIDFromJWT(token string) (string, error) { + parts := strings.Split(token, ".") + if len(parts) != 3 { + return "", errors.New("invalid JWT format") + } + + // Decode payload (middle part) + payload, err := base64.RawURLEncoding.DecodeString(parts[1]) + if err != nil { + return "", fmt.Errorf("failed to decode JWT payload: %w", err) + } + + var claims struct { + TSID string `json:"tsid"` + } + if err := json.Unmarshal(payload, &claims); err != nil { + return "", fmt.Errorf("failed to parse JWT claims: %w", err) + } + + if claims.TSID == "" { + return "", errors.New("JWT missing tsid claim") + } + + return claims.TSID, nil +} + +// TokenExchangeResult holds the result of a token exchange +type TokenExchangeResult struct { + AccessToken string `json:"access_token"` + TokenType string `json:"token_type"` + ExpiresIn int `json:"expires_in,omitempty"` + ExpiresAt time.Time `json:"-"` // Computed from ExpiresIn +} + +// IsExpired checks if the token has expired (with 30s buffer) +func (r *TokenExchangeResult) IsExpired() bool { + return time.Now().Add(30 * time.Second).After(r.ExpiresAt) +} +``` + +**Token Cache Implementation:** + +```go +// pkg/auth/authserver/cache.go + +// TokenCache provides thread-safe caching of exchanged tokens +type TokenCache struct { + cache map[string]*cacheEntry + mu sync.RWMutex + defaultTTL time.Duration +} + +type cacheEntry struct { + result *TokenExchangeResult + expiresAt time.Time +} + +func NewTokenCache(defaultTTL time.Duration) *TokenCache { + if defaultTTL == 0 { + defaultTTL = 5 * time.Minute // Conservative default + } + cache := &TokenCache{ + cache: make(map[string]*cacheEntry), + defaultTTL: defaultTTL, + } + // Start background cleanup goroutine + go cache.cleanupLoop() + return cache +} + +// Get retrieves a cached token, returning nil if not found or expired +func (c *TokenCache) Get(tsid string) *TokenExchangeResult { + c.mu.RLock() + defer c.mu.RUnlock() + + entry, ok := c.cache[tsid] + if !ok { + return nil + } + + // Check if expired + if time.Now().After(entry.expiresAt) { + return nil + } + + return entry.result +} + +// Set stores a token in the cache +// TTL is set to 80% of the token's remaining lifetime, or defaultTTL if no expiry +func (c *TokenCache) Set(tsid string, result *TokenExchangeResult) { + c.mu.Lock() + defer c.mu.Unlock() + + var ttl time.Duration + if !result.ExpiresAt.IsZero() { + // Use 80% of remaining lifetime to refresh before expiry + remaining := time.Until(result.ExpiresAt) + ttl = time.Duration(float64(remaining) * 0.8) + } + if ttl <= 0 { + ttl = c.defaultTTL + } + + c.cache[tsid] = &cacheEntry{ + result: result, + expiresAt: time.Now().Add(ttl), + } +} + +// cleanupLoop periodically removes expired entries +func (c *TokenCache) cleanupLoop() { + ticker := time.NewTicker(1 * time.Minute) + defer ticker.Stop() + + for range ticker.C { + c.cleanup() + } +} + +func (c *TokenCache) cleanup() { + c.mu.Lock() + defer c.mu.Unlock() + + now := time.Now() + for tsid, entry := range c.cache { + if now.After(entry.expiresAt) { + delete(c.cache, tsid) + } + } +} +``` + +**Integration with tokenexchange middleware:** + +Update the existing token exchange middleware to use the authserver client when configured: + +```go +// pkg/auth/tokenexchange/middleware.go (updated) + +// In the middleware function, check for authserver config +func (m *Middleware) ServeHTTP(w http.ResponseWriter, r *http.Request) { + // ... existing code to extract identity and subject token ... + + // If authserver client is configured, use it to get upstream token + if m.authServerClient != nil { + result, err := m.authServerClient.ExchangeToken(r.Context(), subjectToken) + if err != nil { + logger.Errorf("Authserver token exchange failed: %v", err) + // Fall back to direct token exchange if configured + if m.directExchangeConfig != nil { + // ... existing direct exchange logic ... + } else { + http.Error(w, "token exchange failed", http.StatusUnauthorized) + return + } + } else { + // Use the upstream token from authserver + r.Header.Set("Authorization", "Bearer "+result.AccessToken) + } + } + + next.ServeHTTP(w, r) +} +``` + +#### RunConfig Extension + +Extend [pkg/runner/config.go](pkg/runner/config.go): + +```go +type RunConfig struct { + // ... existing fields ... + + // AuthServerConfig configures connection to the authorization server + AuthServerConfig *AuthServerClientConfig `json:"authserver_config,omitempty"` +} + +type AuthServerClientConfig struct { + URL string `json:"url"` + ClientCertPath string `json:"client_cert_path,omitempty"` + ClientKeyPath string `json:"client_key_path,omitempty"` + CABundlePath string `json:"ca_bundle_path,omitempty"` +} +``` + +--- + +## Key Files to Modify + +### New Files + +| File | Purpose | +|------|---------| +| `cmd/thv-operator/api/v1alpha1/mcpauthserver_types.go` | MCPAuthServer CRD with mTLS config | +| `cmd/thv-operator/controllers/mcpauthserver_controller.go` | MCPAuthServer reconciler | +| `pkg/authserver/server/handlers/proxyrunner_exchange.go` | Token exchange endpoint for proxyrunners | +| `pkg/authserver/server/handlers/subject_validator.go` | mTLS subject validation (allowedSubjects) | +| `pkg/auth/authserver/client.go` | Client for proxyrunner → authserver mTLS calls | +| `pkg/auth/authserver/cache.go` | Token cache for exchanged tokens | + +### Modified Files + +| File | Changes | +|------|---------| +| `cmd/thv-operator/api/v1alpha1/mcpserver_types.go` | Add `AuthServerClientConfig` for mTLS client certs | +| `cmd/thv-operator/controllers/mcpserver_controller.go` | Create cert-manager Certificate for proxyrunner | +| `pkg/networking/http_client.go` | Add `WithClientCertificate()` to HttpClientBuilder | +| `pkg/authserver/server/handlers/handler.go` | Add `subjectValidator` field, update `NewHandler()` | +| `pkg/authserver/storage/types.go` | Add `GetUpstreamTokens(ctx, tsid)` method | +| `pkg/runner/config.go` | Add `AuthServerConfig` struct | +| `pkg/auth/tokenexchange/middleware.go` | Integrate authserver client for token exchange | + +--- + +## Implementation Phases + +### Phase 1: CRD and Operator Foundation + +**MCPAuthServer CRD:** +1. Create `mcpauthserver_types.go` with CRD types: + - `MCPAuthServerSpec` (issuer, replicas, upstreamIdp, signingKey, tls) + - `AuthServerTLSConfig` (certificateRef, clientAuth) + - `ClientAuthConfig` (caBundle, allowedSubjects) + - `AllowedSubjects` (organizationalUnits, commonNamePattern) +2. Run `make generate` and `make manifests` +3. Create `mcpauthserver_controller.go` with basic reconciliation + +**MCPServer CRD Updates:** +4. Add `AuthServerClientConfig` to `MCPServerSpec` +5. Add `ClientCertificateConfig` and `CertManagerIssuerReference` types +6. Run `make generate` and `make manifests` + +### Phase 2: Authserver Server-Side mTLS + +**Handler Updates (`pkg/authserver/server/handlers/`):** +1. Create `subject_validator.go`: + - `SubjectValidator` struct + - `NewSubjectValidator()` function + - `validateSubjectAllowed()` method +2. Update `handler.go`: + - Add `subjectValidator` field to Handler + - Update `NewHandler()` to accept `allowedSubjects` +3. Create `proxyrunner_exchange.go`: + - `ProxyRunnerIdentity` struct + - `extractProxyRunnerIdentity()` function + - `validateSessionAudience()` function + - `ProxyRunnerTokenExchangeHandler()` handler +4. Update `handler.go` Routes() to register `/internal/token-exchange` + +**Storage Updates (`pkg/authserver/storage/`):** +5. Add `GetUpstreamTokens(ctx, tsid)` to `UpstreamTokenStorage` interface +6. Implement in `memory.go` (and later Redis) + +### Phase 3: HttpClientBuilder mTLS Support + +**Networking Package (`pkg/networking/`):** +1. Add fields to `HttpClientBuilder`: + - `clientCertPath string` + - `clientKeyPath string` +2. Add `WithClientCertificate(certPath, keyPath)` method +3. Update `Build()` to load and configure client certificate: + ```go + cert, err := tls.LoadX509KeyPair(certPath, keyPath) + transport.TLSClientConfig.Certificates = []tls.Certificate{cert} + ``` +4. Add unit tests for mTLS client configuration + +### Phase 4: ProxyRunner Authserver Client + +**New Package (`pkg/auth/authserver/`):** +1. Create `client.go`: + - `Config` struct + - `AuthServerClient` struct + - `NewAuthServerClient()` constructor + - `ExchangeToken()` with caching + - `doTokenExchange()` HTTP implementation + - `extractTSIDFromJWT()` helper +2. Create `cache.go`: + - `TokenCache` struct + - `NewTokenCache()` constructor + - `Get()`, `Set()`, `cleanup()` methods +3. Add unit tests + +### Phase 5: RunConfig and Middleware Integration + +**RunConfig (`pkg/runner/config.go`):** +1. Add `AuthServerConfig` field to `RunConfig` +2. Add `AuthServerClientConfig` struct + +**Token Exchange Middleware (`pkg/auth/tokenexchange/`):** +3. Add `authServerClient` field to middleware +4. Update middleware factory to create authserver client if configured +5. Update `ServeHTTP` to use authserver client when available +6. Add fallback to direct exchange if authserver fails + +### Phase 6: MCPServer Controller Integration + +**Controller Updates (`cmd/thv-operator/controllers/mcpserver_controller.go`):** +1. Check for `authServerClientConfig` in spec +2. If `clientCertificateRef` specified: + - Create cert-manager `Certificate` resource + - Wait for certificate to be ready +3. Mount client cert Secret to proxyrunner pod: + - Add volume from Secret + - Add volumeMount to `/etc/toolhive/authserver-mtls` +4. Configure runconfig with authserver client paths + +### Phase 7: MCPAuthServer Controller + +**Controller (`cmd/thv-operator/controllers/mcpauthserver_controller.go`):** +1. Create Deployment with authserver container +2. Configure server TLS from `certificateRef` +3. Mount client CA bundle for mTLS verification +4. Create ClusterIP Service +5. Create ConfigMap for authserver configuration +6. Handle status conditions and reconciliation + +### Phase 8: Testing and Production Readiness + +**Testing:** + +1. Unit tests for all new packages +2. Integration tests for mTLS handshake +3. E2E test: client → proxyrunner → authserver → upstream token + +**Production Features:** + +4. Redis-backed storage for distributed sessions +5. Metrics for token exchange latency and cache hit rate +6. Audit logging for mTLS identity and token access +7. Certificate rotation handling diff --git a/rfcs/THV-00XX-standalone-auth-server-overview.md b/rfcs/THV-00XX-standalone-auth-server-overview.md new file mode 100644 index 0000000..7711a43 --- /dev/null +++ b/rfcs/THV-00XX-standalone-auth-server-overview.md @@ -0,0 +1,229 @@ +# RFC-XXXX: Title + +- **Status**: Draft | Under Review | Accepted | Rejected | Implemented | Superseded +- **Author(s)**: Your Name (@github-handle) +- **Created**: YYYY-MM-DD +- **Last Updated**: YYYY-MM-DD +- **Target Repository**: toolhive | toolhive-studio | toolhive-registry | toolhive-registry-server | multiple +- **Related Issues**: [toolhive#1234](https://github.com/stacklok/toolhive/issues/1234) (if applicable) + +## Summary + +A brief (2-3 sentence) description of the proposed change. This should be clear enough that someone can understand the essence of the proposal without reading the full document. + +## Problem Statement + +Describe the problem this RFC is trying to solve. Include: + +- What is the current behavior or limitation? +- Who is affected by this problem? +- Why is this problem worth solving? + +## Goals + +List the specific goals this RFC aims to achieve: + +- Goal 1 +- Goal 2 +- Goal 3 + +## Non-Goals + +Explicitly state what this RFC does NOT aim to address. This helps set scope and prevents scope creep: + +- Non-goal 1 +- Non-goal 2 + +## Proposed Solution + +Describe your proposed solution in detail. This section should be comprehensive enough for someone to implement the solution. + +### High-Level Design + +Provide an overview of the design. Include diagrams where helpful (Mermaid diagrams are encouraged): + +```mermaid +flowchart LR + A[Component A] --> B[Component B] + B --> C[Component C] +``` + +### Detailed Design + +Break down the implementation details: + +#### Component Changes + +Describe changes to existing components or new components being introduced. + +#### API Changes + +Document any API changes, including: + +- New endpoints or methods +- Modified signatures +- Deprecated functionality + +```go +// Example API change +type NewInterface interface { + Method(ctx context.Context, param string) (Result, error) +} +``` + +#### Configuration Changes + +Document any new configuration options: + +```yaml +# Example configuration +newFeature: + enabled: true + setting: value +``` + +#### Data Model Changes + +Describe any changes to data models, schemas, or storage. + +## Security Considerations + +**This section is required.** Security is a core concern for ToolHive. Address the following: + +### Threat Model + +- What are the potential threats introduced by this change? +- Who are the potential attackers and what are their capabilities? + +### Authentication and Authorization + +- How does this change affect authentication? +- What authorization checks are required? +- Are there any changes to permission models? + +### Data Security + +- What sensitive data does this feature handle? +- How is data protected at rest and in transit? +- Are there any data retention or deletion considerations? + +### Input Validation + +- What user input does this feature accept? +- How is input validated and sanitized? +- What are the potential injection vectors? + +### Secrets Management + +- Does this feature require any secrets or credentials? +- How are secrets stored and accessed? +- Are secrets properly rotated and revocable? + +### Audit and Logging + +- What security-relevant events should be logged? +- Are there any compliance requirements? + +### Mitigations + +- What security controls are implemented? +- How do these mitigations address the identified threats? + +## Alternatives Considered + +Describe alternative approaches you considered and why you chose the proposed solution: + +### Alternative 1: [Name] + +- Description +- Pros +- Cons +- Why not chosen + +### Alternative 2: [Name] + +- Description +- Pros +- Cons +- Why not chosen + +## Compatibility + +### Backward Compatibility + +- Is this change backward compatible? +- If not, what is the migration path? +- Are there any deprecation timelines? + +### Forward Compatibility + +- How does this design accommodate future changes? +- Are there extensibility points? + +## Implementation Plan + +Outline the implementation approach: + +### Phase 1: [Description] + +- Task 1 +- Task 2 + +### Phase 2: [Description] + +- Task 3 +- Task 4 + +### Dependencies + +List any dependencies on other work or external factors. + +## Testing Strategy + +Describe how this feature will be tested: + +- Unit tests +- Integration tests +- End-to-end tests +- Performance tests +- Security tests + +## Documentation + +What documentation needs to be created or updated? + +- User documentation +- API documentation +- Architecture documentation +- Runbooks or operational guides + +## Open Questions + +List any unresolved questions that need to be addressed during the review: + +1. Question 1 +2. Question 2 + +## References + +- Link to relevant documentation +- Link to related RFCs or proposals +- Link to external specifications or standards + +--- + +## RFC Lifecycle + + + +### Review History + +| Date | Reviewer | Decision | Notes | +|------|----------|----------|-------| +| YYYY-MM-DD | @reviewer | Under Review | Initial submission | + +### Implementation Tracking + +| Repository | PR | Status | +|------------|-----|--------| +| toolhive | #XXXX | Merged | From c8a868c94a6818821699ec37b61a2a1ca3af9bc4 Mon Sep 17 00:00:00 2001 From: Trey Date: Sat, 24 Jan 2026 15:19:44 -0800 Subject: [PATCH 2/7] Update THV-00XX-standalone-auth-server-design.md --- .../THV-00XX-standalone-auth-server-design.md | 320 +++++++++++++----- 1 file changed, 229 insertions(+), 91 deletions(-) diff --git a/rfcs/THV-00XX-standalone-auth-server-design.md b/rfcs/THV-00XX-standalone-auth-server-design.md index 593cc67..2580f08 100644 --- a/rfcs/THV-00XX-standalone-auth-server-design.md +++ b/rfcs/THV-00XX-standalone-auth-server-design.md @@ -39,7 +39,11 @@ This document describes how `pkg/authserver` could be deployed as a standalone K ### 1. Kubernetes Deployment Model -#### New CRD: MCPAuthServer +#### 1.1 MCPAuthServer (New) + +A new CRD and controller for deploying the authserver as a standalone Kubernetes service. + +**CRD Specification:** Following the [MCPServer CRD pattern](cmd/thv-operator/api/v1alpha1/mcpserver_types.go): @@ -70,9 +74,13 @@ spec: algorithm: RS256 tls: - # Server certificate for authserver's HTTPS endpoint - certificateRef: - name: mcp-authserver-tls # cert-manager Certificate + # Issuer for server certificate (controller creates the Certificate resource) + serverCert: + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + duration: "8760h" # 1 year (optional, has default) + renewBefore: "720h" # 30 days (optional, has default) # mTLS: Client CA and validation rules for proxyrunner authentication clientAuth: @@ -98,15 +106,33 @@ spec: ```go // AuthServerTLSConfig configures TLS and mTLS for the authserver type AuthServerTLSConfig struct { - // CertificateRef references a cert-manager Certificate for server TLS + // ServerCert configures automatic server certificate provisioning + // Controller creates a cert-manager Certificate using this issuer // +kubebuilder:validation:Required - CertificateRef CertificateReference `json:"certificateRef"` + ServerCert ServerCertConfig `json:"serverCert"` // ClientAuth configures mTLS client certificate validation // +optional ClientAuth *ClientAuthConfig `json:"clientAuth,omitempty"` } +// ServerCertConfig configures automatic server certificate provisioning +type ServerCertConfig struct { + // IssuerRef references the cert-manager issuer to use + // +kubebuilder:validation:Required + IssuerRef CertManagerIssuerReference `json:"issuerRef"` + + // Duration is the certificate validity period (default: 8760h / 1 year) + // +kubebuilder:default="8760h" + // +optional + Duration string `json:"duration,omitempty"` + + // RenewBefore is when to renew before expiry (default: 720h / 30 days) + // +kubebuilder:default="720h" + // +optional + RenewBefore string `json:"renewBefore,omitempty"` +} + // ClientAuthConfig configures mTLS client verification for proxyrunners type ClientAuthConfig struct { // CABundle references a ConfigMap containing the CA certificate for @@ -133,13 +159,60 @@ type AllowedSubjects struct { } ``` -#### Resources Created by Controller -1. **Deployment** - Authserver pods with mTLS configuration -2. **Service** - ClusterIP for internal access (port 443 → 8443) -3. **ConfigMap** - Runtime configuration -4. **ServiceAccount** - Kubernetes RBAC identity +**MCPAuthServer Controller:** -#### MCPServer CRD Updates for mTLS Client Certificate +The controller reconciles MCPAuthServer resources and creates/manages the following Kubernetes resources: + +1. **cert-manager Certificate** - Server certificate for TLS: + ```yaml + # Created from tls.serverCert configuration + apiVersion: cert-manager.io/v1 + kind: Certificate + metadata: + name: ${MCPAuthServer.name}-tls + namespace: ${MCPAuthServer.namespace} + spec: + secretName: ${MCPAuthServer.name}-tls + duration: ${tls.serverCert.duration} # default: 8760h + renewBefore: ${tls.serverCert.renewBefore} # default: 720h + issuerRef: ${tls.serverCert.issuerRef} + commonName: ${MCPAuthServer.name}.${namespace}.svc.cluster.local + dnsNames: + - ${MCPAuthServer.name}.${namespace}.svc.cluster.local + - ${MCPAuthServer.name}.${namespace}.svc + - ${MCPAuthServer.name} + usages: + - server auth + ``` +2. **Deployment** - Authserver pods running `thv-authserver` image: + - Mounts server certificate Secret at `/etc/toolhive/server-tls` + - Mounts client CA ConfigMap at `/etc/toolhive/client-ca` + - Mounts ConfigMap for authserver configuration + ```yaml + volumes: + - name: server-tls + secret: + secretName: ${MCPAuthServer.name}-tls + - name: client-ca + configMap: + name: ${tls.clientAuth.caBundle.configMapRef.name} + volumeMounts: + - name: server-tls + mountPath: /etc/toolhive/server-tls + readOnly: true + - name: client-ca + mountPath: /etc/toolhive/client-ca + readOnly: true + ``` +3. **Service** - ClusterIP for internal access (port 443 → 8443) +4. **ConfigMap** - Runtime configuration (issuer, signing key path, client CA path) +5. **ServiceAccount** - Kubernetes RBAC identity + +The controller watches for changes to the MCPAuthServer CR and reconciles the dependent resources. It also monitors the cert-manager Certificate for readiness before creating the Deployment. + +--- + +#### 1.2 MCPServer Updates Add a new field to `MCPServerSpec` for configuring the client certificate used when the proxyrunner communicates with the authserver: @@ -161,15 +234,15 @@ type AuthServerClientConfig struct { // +kubebuilder:validation:Required URL string `json:"url"` - // ClientCertificateRef references a cert-manager Certificate for mTLS client auth + // ClientCert configures automatic client certificate provisioning for mTLS // If specified, the controller creates the Certificate and mounts it to the pod // +optional - ClientCertificateRef *ClientCertificateConfig `json:"clientCertificateRef,omitempty"` + ClientCert *ClientCertificateConfig `json:"clientCert,omitempty"` - // CABundleRef references a ConfigMap containing the CA bundle for verifying authserver + // CABundle references a ConfigMap containing the CA bundle for verifying authserver // Reuses existing CABundleSource type (defined at mcpserver_types.go:493-499) // +optional - CABundleRef *CABundleSource `json:"caBundleRef,omitempty"` + CABundle *CABundleSource `json:"caBundle,omitempty"` } // ClientCertificateConfig configures automatic client certificate provisioning @@ -190,6 +263,8 @@ type ClientCertificateConfig struct { } // CertManagerIssuerReference references a cert-manager Issuer or ClusterIssuer +// NOTE: This type is shared between MCPAuthServer and MCPServer CRDs. +// Define in cmd/thv-operator/api/v1alpha1/certmanager_types.go (new file) type CertManagerIssuerReference struct { // Name of the issuer Name string `json:"name"` @@ -212,7 +287,8 @@ metadata: spec: image: ghcr.io/example/github-mcp:latest - # OIDC config points to the authserver (for token validation) + # OIDC config - proxyrunner validates JWTs using authserver's JWKS + # The issuer also appears in /.well-known/oauth-protected-resource for client discovery oidcConfig: type: inline resourceUrl: "https://github-tools.example.com/" @@ -228,7 +304,7 @@ spec: # - CN: github-tools.proxyrunner.mcp-servers.toolhive.local # - O: ToolHive ProxyRunner # - OU: mcp-servers - clientCertificateRef: + clientCert: issuerRef: name: toolhive-mtls-ca kind: ClusterIssuer @@ -236,7 +312,7 @@ spec: renewBefore: "360h" # 15 days # CA bundle for verifying authserver's server certificate - caBundleRef: + caBundle: configMapRef: name: toolhive-mtls-ca-bundle key: ca.crt @@ -244,7 +320,7 @@ spec: **Controller Behavior:** -When `authServerClientConfig.clientCertificateRef` is specified, the MCPServer controller: +When `authServerClientConfig.clientCert` is specified, the MCPServer controller: 1. **Creates a cert-manager Certificate:** ```yaml @@ -612,52 +688,11 @@ spec: #### Authserver Server Certificate -```yaml -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - name: mcp-authserver-tls - namespace: toolhive-system -spec: - secretName: mcp-authserver-tls - duration: 8760h # 1 year - renewBefore: 720h # 30 days - issuerRef: - name: toolhive-mtls-ca - kind: ClusterIssuer - commonName: mcp-authserver.toolhive-system.svc.cluster.local - dnsNames: - - mcp-authserver.toolhive-system.svc.cluster.local - - mcp-authserver.toolhive-system.svc - - mcp-authserver - usages: - - server auth -``` +The authserver's server certificate is **automatically generated by the MCPAuthServer controller** based on the `tls.serverCert` configuration. See **Section 1: MCPAuthServer CRD** for the configuration. #### ProxyRunner Client Certificate (per MCPServer) -```yaml -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - name: ${MCPSERVER_NAME}-proxyrunner-client - namespace: ${NAMESPACE} -spec: - secretName: ${MCPSERVER_NAME}-proxyrunner-client-tls - duration: 2160h # 90 days - renewBefore: 360h # 15 days - issuerRef: - name: toolhive-mtls-ca - kind: ClusterIssuer - commonName: ${MCPSERVER_NAME}.proxyrunner.${NAMESPACE}.toolhive.local - subject: - organizations: - - "ToolHive ProxyRunner" - organizationalUnits: - - ${NAMESPACE} - usages: - - client auth -``` +Client certificates for proxyrunners are **automatically generated by the MCPServer controller** when `authServerClientConfig.clientCert` is specified in the MCPServer CRD. See **Section 1: MCPServer CRD Updates** for the configuration and controller behavior. #### HttpClientBuilder Extension @@ -685,7 +720,7 @@ if b.clientCertPath != "" && b.clientKeyPath != "" { ### 5. Token Flow -#### Why Token Exchange is Needed +#### Overview: Why Token Exchange is Needed When a client authenticates, the authserver: 1. Redirects to upstream IDP (Google, GitHub, etc.) @@ -698,6 +733,10 @@ The client's JWT **does not contain the upstream tokens** - only a reference to 2. Call authserver to retrieve the upstream access token 3. Use that token to authenticate to the backend +**Token refresh responsibilities:** +- **Client JWT refresh**: The client is responsible for refreshing its authserver-issued JWT by calling the authserver's `/oauth/token` endpoint with `grant_type=refresh_token`. The proxyrunner returns 401 if the JWT is expired. +- **Upstream IDP token refresh**: The authserver automatically refreshes expired upstream tokens internally when the proxyrunner calls the token exchange endpoint. The proxyrunner never handles upstream refresh directly. + #### Complete Flow with Upstream Token Retrieval ``` @@ -750,12 +789,12 @@ The client's JWT **does not contain the upstream tokens** - only a reference to │ 10. MCP Response │ │ │ ``` -#### Proxyrunner Token Exchange Endpoint (new) +#### 5.1 MCPAuthServer: Token Exchange Endpoint -Add to authserver handlers: +The authserver exposes an internal endpoint for proxyrunners to exchange client JWTs for upstream access tokens. ```go -// pkg/authserver/server/handlers/proxyrunner_exchange.go +// pkg/authserver/server/handlers/token_exchange.go // ProxyRunnerIdentity represents the identity extracted from a proxyrunner's mTLS client certificate type ProxyRunnerIdentity struct { @@ -988,7 +1027,7 @@ func (h *Handler) validateSessionAudience(claims map[string]interface{}, identit audiences, identity.Namespace, identity.Name) } -func (h *Handler) ProxyRunnerTokenExchangeHandler(w http.ResponseWriter, r *http.Request) { +func (h *Handler) TokenExchangeHandler(w http.ResponseWriter, r *http.Request) { ctx := r.Context() // 1. Extract proxyrunner identity from mTLS client cert @@ -1035,9 +1074,17 @@ func (h *Handler) ProxyRunnerTokenExchangeHandler(w http.ResponseWriter, r *http // 5. Retrieve upstream tokens using session ID upstreamTokens, err := h.storage.GetUpstreamTokens(ctx, tsid) - if err != nil { + if errors.Is(err, storage.ErrExpired) { + // 5a. Tokens expired - attempt refresh using stored refresh token + upstreamTokens, err = h.refreshUpstreamTokens(ctx, tsid) + if err != nil { + logger.Warnf("Failed to refresh upstream tokens for tsid %s: %v", tsid, err) + h.writeError(w, fosite.ErrServerError.WithHint("upstream token refresh failed")) + return + } + } else if err != nil { logger.Warnf("Failed to retrieve upstream tokens for tsid %s: %v", tsid, err) - h.writeError(w, fosite.ErrInvalidRequest.WithHint("session not found or expired")) + h.writeError(w, fosite.ErrInvalidRequest.WithHint("session not found")) return } @@ -1065,9 +1112,48 @@ type TokenExchangeResponse struct { TokenType string `json:"token_type"` ExpiresIn int `json:"expires_in,omitempty"` } + +// refreshUpstreamTokens refreshes expired upstream IDP tokens. +// Uses the stored refresh token to get new tokens from the upstream IDP. +func (h *Handler) refreshUpstreamTokens(ctx context.Context, tsid string) (*storage.UpstreamTokens, error) { + // Get stored tokens (including refresh token) - bypass expiry check + storedTokens, err := h.storage.GetUpstreamTokensForRefresh(ctx, tsid) + if err != nil { + return nil, fmt.Errorf("failed to get stored tokens: %w", err) + } + + if storedTokens.RefreshToken == "" { + return nil, errors.New("no refresh token available") + } + + // Call upstream IDP to refresh tokens + newTokens, err := h.upstreamIdP.RefreshTokens(ctx, storedTokens.RefreshToken) + if err != nil { + return nil, fmt.Errorf("upstream refresh failed: %w", err) + } + + // Update stored tokens with refreshed values + updatedTokens := &storage.UpstreamTokens{ + ProviderID: storedTokens.ProviderID, + AccessToken: newTokens.AccessToken, + RefreshToken: newTokens.RefreshToken, // May be rotated + IDToken: newTokens.IDToken, + ExpiresAt: newTokens.ExpiresAt, + UserID: storedTokens.UserID, + UpstreamSubject: storedTokens.UpstreamSubject, + ClientID: storedTokens.ClientID, + } + + if err := h.storage.StoreUpstreamTokens(ctx, tsid, updatedTokens); err != nil { + return nil, fmt.Errorf("failed to store refreshed tokens: %w", err) + } + + logger.Infow("upstream tokens refreshed", "tsid", tsid) + return updatedTokens, nil +} ``` -#### ProxyRunner Client-Side Token Exchange (MCPServer) +#### 5.2 MCPServer: AuthServer Client The proxyrunner needs a client to call the authserver's token exchange endpoint with mTLS and caching. @@ -1108,12 +1194,13 @@ func NewAuthServerClient(cfg *Config) (*AuthServerClient, error) { // Build HTTP client with mTLS using existing HttpClientBuilder clientBuilder := networking.NewHttpClientBuilder() - // Add CA bundle for server certificate validation + // Add CA bundle for verifying authserver's server certificate + // This ensures we only connect to an authserver with a cert signed by the trusted CA if cfg.CABundlePath != "" { clientBuilder = clientBuilder.WithCABundle(cfg.CABundlePath) } - // Add client certificate for mTLS + // Add client certificate for mTLS (authserver verifies this) if cfg.ClientCertPath != "" && cfg.ClientKeyPath != "" { clientBuilder = clientBuilder.WithClientCertificate(cfg.ClientCertPath, cfg.ClientKeyPath) } @@ -1141,8 +1228,12 @@ func (c *AuthServerClient) ExchangeToken(ctx context.Context, clientJWT string) // Check cache first if cached := c.tokenCache.Get(tsid); cached != nil { - logger.Debugf("Token cache hit for tsid %s", tsid) - return cached, nil + // Verify the token hasn't expired (with 30s buffer) + if !cached.IsExpired() { + logger.Debugf("Token cache hit for tsid %s", tsid) + return cached, nil + } + logger.Debugf("Token cache hit but token expired for tsid %s", tsid) } // Cache miss - call authserver @@ -1397,9 +1488,10 @@ type AuthServerClientConfig struct { | File | Purpose | |------|---------| +| `cmd/thv-operator/api/v1alpha1/certmanager_types.go` | Shared types (CertManagerIssuerReference) | | `cmd/thv-operator/api/v1alpha1/mcpauthserver_types.go` | MCPAuthServer CRD with mTLS config | | `cmd/thv-operator/controllers/mcpauthserver_controller.go` | MCPAuthServer reconciler | -| `pkg/authserver/server/handlers/proxyrunner_exchange.go` | Token exchange endpoint for proxyrunners | +| `pkg/authserver/server/handlers/token_exchange.go` | Token exchange endpoint for proxyrunners | | `pkg/authserver/server/handlers/subject_validator.go` | mTLS subject validation (allowedSubjects) | | `pkg/auth/authserver/client.go` | Client for proxyrunner → authserver mTLS calls | | `pkg/auth/authserver/cache.go` | Token cache for exchanged tokens | @@ -1412,7 +1504,7 @@ type AuthServerClientConfig struct { | `cmd/thv-operator/controllers/mcpserver_controller.go` | Create cert-manager Certificate for proxyrunner | | `pkg/networking/http_client.go` | Add `WithClientCertificate()` to HttpClientBuilder | | `pkg/authserver/server/handlers/handler.go` | Add `subjectValidator` field, update `NewHandler()` | -| `pkg/authserver/storage/types.go` | Add `GetUpstreamTokens(ctx, tsid)` method | +| `pkg/authserver/storage/types.go` | Add `GetUpstreamTokensForRefresh(ctx, tsid)` method (bypasses expiry check) | | `pkg/runner/config.go` | Add `AuthServerConfig` struct | | `pkg/auth/tokenexchange/middleware.go` | Integrate authserver client for token exchange | @@ -1422,19 +1514,24 @@ type AuthServerClientConfig struct { ### Phase 1: CRD and Operator Foundation +**Shared Types:** +1. Create `certmanager_types.go` with shared types: + - `CertManagerIssuerReference` (used by both MCPAuthServer and MCPServer) + **MCPAuthServer CRD:** -1. Create `mcpauthserver_types.go` with CRD types: +2. Create `mcpauthserver_types.go` with CRD types: - `MCPAuthServerSpec` (issuer, replicas, upstreamIdp, signingKey, tls) - - `AuthServerTLSConfig` (certificateRef, clientAuth) + - `AuthServerTLSConfig` (serverCert, clientAuth) + - `ServerCertConfig` (issuerRef, duration, renewBefore) - `ClientAuthConfig` (caBundle, allowedSubjects) - `AllowedSubjects` (organizationalUnits, commonNamePattern) -2. Run `make generate` and `make manifests` -3. Create `mcpauthserver_controller.go` with basic reconciliation +3. Run `make generate` and `make manifests` +4. Create `mcpauthserver_controller.go` with basic reconciliation **MCPServer CRD Updates:** -4. Add `AuthServerClientConfig` to `MCPServerSpec` -5. Add `ClientCertificateConfig` and `CertManagerIssuerReference` types -6. Run `make generate` and `make manifests` +5. Add `AuthServerClientConfig` to `MCPServerSpec` +6. Add `ClientCertificateConfig` type (uses shared `CertManagerIssuerReference`) +7. Run `make generate` and `make manifests` ### Phase 2: Authserver Server-Side mTLS @@ -1446,11 +1543,11 @@ type AuthServerClientConfig struct { 2. Update `handler.go`: - Add `subjectValidator` field to Handler - Update `NewHandler()` to accept `allowedSubjects` -3. Create `proxyrunner_exchange.go`: +3. Create `token_exchange.go`: - `ProxyRunnerIdentity` struct - `extractProxyRunnerIdentity()` function - `validateSessionAudience()` function - - `ProxyRunnerTokenExchangeHandler()` handler + - `TokenExchangeHandler()` handler 4. Update `handler.go` Routes() to register `/internal/token-exchange` **Storage Updates (`pkg/authserver/storage/`):** @@ -1503,7 +1600,7 @@ type AuthServerClientConfig struct { **Controller Updates (`cmd/thv-operator/controllers/mcpserver_controller.go`):** 1. Check for `authServerClientConfig` in spec -2. If `clientCertificateRef` specified: +2. If `clientCert` specified: - Create cert-manager `Certificate` resource - Wait for certificate to be ready 3. Mount client cert Secret to proxyrunner pod: @@ -1511,7 +1608,50 @@ type AuthServerClientConfig struct { - Add volumeMount to `/etc/toolhive/authserver-mtls` 4. Configure runconfig with authserver client paths -### Phase 7: MCPAuthServer Controller +### Phase 7: Authserver Service Binary + +**New Entry Point (`cmd/thv-authserver/`):** +1. Create `main.go` with Cobra CLI +2. Create `app/commands.go` with root command +3. Create `app/serve.go` with serve command: + ```go + // Loads configuration from: + // - ConfigMap mounted at /etc/authserver/config.yaml + // - Environment variables (AUTHSERVER_*) + // - Command line flags + + // Creates: + // - Upstream IDP provider (OAuth2/OIDC) + // - Storage backend (memory or Redis) + // - Fosite OAuth2 provider + // - Handler with subject validator + // - HTTP server with mTLS + ``` + +**HTTP Server with mTLS (`pkg/authserver/server/`):** +4. Create `server.go`: + - `Server` struct with `net/http.Server` + - `NewServer(config, handler)` constructor + - `Start()` with TLS and mTLS configuration: + ```go + tlsConfig := &tls.Config{ + MinVersion: tls.VersionTLS12, + Certificates: []tls.Certificate{serverCert}, + ClientAuth: tls.RequireAndVerifyClientCert, + ClientCAs: clientCAPool, + VerifyPeerCertificate: subjectValidator.VerifyPeerCertificate, + } + ``` + - `Stop()` graceful shutdown +5. Add health check endpoint at `/healthz` +6. Add readiness endpoint at `/readyz` + +**Container Image:** +7. Add Dockerfile at `cmd/thv-authserver/Dockerfile` +8. Add to `Taskfile.yaml` build targets +9. Push to container registry + +### Phase 8: MCPAuthServer Controller **Controller (`cmd/thv-operator/controllers/mcpauthserver_controller.go`):** 1. Create Deployment with authserver container @@ -1521,16 +1661,14 @@ type AuthServerClientConfig struct { 5. Create ConfigMap for authserver configuration 6. Handle status conditions and reconciliation -### Phase 8: Testing and Production Readiness +### Phase 9: Testing and Production Readiness **Testing:** - 1. Unit tests for all new packages 2. Integration tests for mTLS handshake 3. E2E test: client → proxyrunner → authserver → upstream token **Production Features:** - 4. Redis-backed storage for distributed sessions 5. Metrics for token exchange latency and cache hit rate 6. Audit logging for mTLS identity and token access From 8ffefc05a0804ad12be6dffb902c96d46fa6b4bc Mon Sep 17 00:00:00 2001 From: Trey Date: Sun, 25 Jan 2026 07:47:44 -0800 Subject: [PATCH 3/7] Update THV-00XX-standalone-auth-server-design.md --- .../THV-00XX-standalone-auth-server-design.md | 144 +++++++++++++----- 1 file changed, 108 insertions(+), 36 deletions(-) diff --git a/rfcs/THV-00XX-standalone-auth-server-design.md b/rfcs/THV-00XX-standalone-auth-server-design.md index 2580f08..e756f9e 100644 --- a/rfcs/THV-00XX-standalone-auth-server-design.md +++ b/rfcs/THV-00XX-standalone-auth-server-design.md @@ -320,9 +320,9 @@ spec: **Controller Behavior:** -When `authServerClientConfig.clientCert` is specified, the MCPServer controller: +When `authServerClientConfig` is specified, the MCPServer controller: -1. **Creates a cert-manager Certificate:** +1. **Creates a cert-manager Certificate** (if `clientCert` specified): ```yaml apiVersion: cert-manager.io/v1 kind: Certificate @@ -341,16 +341,24 @@ When `authServerClientConfig.clientCert` is specified, the MCPServer controller: kind: ClusterIssuer ``` -2. **Mounts the Secret to the proxyrunner pod:** +2. **Mounts certificates to the proxyrunner pod:** ```yaml volumes: + # Client certificate for mTLS (if clientCert specified) - name: authserver-client-cert secret: secretName: github-tools-proxyrunner-client-tls + # CA bundle for verifying authserver (if caBundle specified) + - name: authserver-ca-bundle + configMap: + name: toolhive-mtls-ca-bundle volumeMounts: - name: authserver-client-cert mountPath: /etc/toolhive/authserver-mtls readOnly: true + - name: authserver-ca-bundle + mountPath: /etc/toolhive/authserver-ca + readOnly: true ``` 3. **Sets environment variables or runconfig:** @@ -360,7 +368,7 @@ When `authServerClientConfig.clientCert` is specified, the MCPServer controller: "url": "https://mcp-authserver.toolhive-system.svc.cluster.local", "client_cert_path": "/etc/toolhive/authserver-mtls/tls.crt", "client_key_path": "/etc/toolhive/authserver-mtls/tls.key", - "ca_bundle_path": "/etc/toolhive/authserver-mtls/ca.crt" + "ca_bundle_path": "/etc/toolhive/authserver-ca/ca.crt" } } ``` @@ -519,11 +527,12 @@ The `authorization_servers` field tells clients where the authserver is located. #### Why mTLS Between Proxyrunner and Authserver? -The authserver stores **upstream IDP tokens** (access tokens, refresh tokens) and links them to the JWTs it issues via the `tsid` (token session ID) claim. When a proxyrunner receives a client request with a JWT, it may need to: +The authserver stores **upstream IDP tokens** (access tokens, refresh tokens) and links them to the JWTs it issues via the `tsid` (token session ID) claim. When a proxyrunner receives a client request with an authserver-issued JWT, it may need to: 1. **Retrieve the upstream access token** to pass to backend MCP servers that require the original IDP token 2. **Exchange tokens** (RFC 8693) to get a backend-specific token -3. **Refresh expired upstream tokens** on behalf of the client + +Note: The proxyrunner doesn't refresh client JWTs - clients refresh their authserver-issued JWTs directly via the authserver's `/oauth/token` endpoint with `grant_type=refresh_token`. (This endpoint is currently TODO and needs implementation.) When the proxyrunner calls the token exchange endpoint, the authserver internally refreshes expired upstream IDP tokens if needed. This is sensitive because: - **Upstream tokens are valuable** - they grant access to external services (Google, GitHub, etc.) @@ -571,6 +580,20 @@ Each MCPServer gets its own client certificate. This allows the authserver to id - **Access control**: Could restrict which proxyrunners can access which sessions - **Revocation**: Can revoke a single proxyrunner's access without affecting others +#### How Proxyrunner Verifies Authserver Identity + +Before sending sensitive data (client JWTs, session IDs), the proxyrunner must verify it's connecting to a legitimate authserver: + +1. **Server Certificate Verification**: + - Authserver presents its server certificate during TLS handshake + - Proxyrunner validates the certificate chain against the CA bundle (from `authServerClientConfig.caBundle`) + - Certificate must have DNS names matching the authserver URL + +2. **Trust Establishment**: + - Both use the same cert-manager CA (`toolhive-mtls-ca`) + - Proxyrunner's CA bundle contains the CA certificate that signed the authserver's cert + - If verification fails, connection is rejected (prevents MITM attacks) + #### How Proxyrunner Authenticates to Authserver 1. **Client Certificate Identity**: @@ -1267,8 +1290,11 @@ func (c *AuthServerClient) doTokenExchange(ctx context.Context, clientJWT string req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + // TLS handshake verifies authserver's certificate against CA bundle (configured in NewAuthServerClient) + // and presents client certificate for mTLS authentication resp, err := c.httpClient.Do(req) if err != nil { + // Fails if: server cert invalid, client cert rejected, or connection error return nil, fmt.Errorf("token exchange request failed: %w", err) } defer resp.Body.Close() @@ -1427,16 +1453,20 @@ func (c *TokenCache) cleanup() { } ``` +**JWT Verification against MCPAuthServer JWKS:** + +The existing auth middleware in [pkg/auth/token.go](pkg/auth/token.go) validates incoming JWTs against the authserver's JWKS. When `oidcConfig.issuer` points to the MCPAuthServer, the middleware fetches JWKS from `{issuer}/.well-known/jwks.json` and validates JWT signatures, issuer, audience, and expiration. + **Integration with tokenexchange middleware:** -Update the existing token exchange middleware to use the authserver client when configured: +After the auth middleware validates the JWT, the tokenexchange middleware exchanges it for upstream tokens: ```go // pkg/auth/tokenexchange/middleware.go (updated) -// In the middleware function, check for authserver config func (m *Middleware) ServeHTTP(w http.ResponseWriter, r *http.Request) { - // ... existing code to extract identity and subject token ... + // JWT already validated by auth middleware - extract from Authorization header + subjectToken := extractBearerToken(r) // If authserver client is configured, use it to get upstream token if m.authServerClient != nil { @@ -1488,6 +1518,8 @@ type AuthServerClientConfig struct { | File | Purpose | |------|---------| +| `cmd/thv-authserver/main.go` | Authserver service entry point | +| `cmd/thv-authserver/app/serve.go` | Serve command with mTLS HTTP server | | `cmd/thv-operator/api/v1alpha1/certmanager_types.go` | Shared types (CertManagerIssuerReference) | | `cmd/thv-operator/api/v1alpha1/mcpauthserver_types.go` | MCPAuthServer CRD with mTLS config | | `cmd/thv-operator/controllers/mcpauthserver_controller.go` | MCPAuthServer reconciler | @@ -1501,10 +1533,11 @@ type AuthServerClientConfig struct { | File | Changes | |------|---------| | `cmd/thv-operator/api/v1alpha1/mcpserver_types.go` | Add `AuthServerClientConfig` for mTLS client certs | -| `cmd/thv-operator/controllers/mcpserver_controller.go` | Create cert-manager Certificate for proxyrunner | +| `cmd/thv-operator/controllers/mcpserver_controller.go` | Create cert-manager Certificate for proxyrunner, mount CA bundle | | `pkg/networking/http_client.go` | Add `WithClientCertificate()` to HttpClientBuilder | -| `pkg/authserver/server/handlers/handler.go` | Add `subjectValidator` field, update `NewHandler()` | +| `pkg/authserver/server/handlers/handler.go` | Add `subjectValidator` field, update `NewHandler()`, register token exchange route | | `pkg/authserver/storage/types.go` | Add `GetUpstreamTokensForRefresh(ctx, tsid)` method (bypasses expiry check) | +| `pkg/authserver/storage/memory.go` | Implement `GetUpstreamTokensForRefresh()` | | `pkg/runner/config.go` | Add `AuthServerConfig` struct | | `pkg/auth/tokenexchange/middleware.go` | Integrate authserver client for token exchange | @@ -1512,13 +1545,15 @@ type AuthServerClientConfig struct { ## Implementation Phases -### Phase 1: CRD and Operator Foundation +### Phase 1: MCPAuthServer CRD and Shared Types **Shared Types:** + 1. Create `certmanager_types.go` with shared types: - `CertManagerIssuerReference` (used by both MCPAuthServer and MCPServer) **MCPAuthServer CRD:** + 2. Create `mcpauthserver_types.go` with CRD types: - `MCPAuthServerSpec` (issuer, replicas, upstreamIdp, signingKey, tls) - `AuthServerTLSConfig` (serverCert, clientAuth) @@ -1526,16 +1561,11 @@ type AuthServerClientConfig struct { - `ClientAuthConfig` (caBundle, allowedSubjects) - `AllowedSubjects` (organizationalUnits, commonNamePattern) 3. Run `make generate` and `make manifests` -4. Create `mcpauthserver_controller.go` with basic reconciliation - -**MCPServer CRD Updates:** -5. Add `AuthServerClientConfig` to `MCPServerSpec` -6. Add `ClientCertificateConfig` type (uses shared `CertManagerIssuerReference`) -7. Run `make generate` and `make manifests` ### Phase 2: Authserver Server-Side mTLS **Handler Updates (`pkg/authserver/server/handlers/`):** + 1. Create `subject_validator.go`: - `SubjectValidator` struct - `NewSubjectValidator()` function @@ -1551,12 +1581,21 @@ type AuthServerClientConfig struct { 4. Update `handler.go` Routes() to register `/internal/token-exchange` **Storage Updates (`pkg/authserver/storage/`):** -5. Add `GetUpstreamTokens(ctx, tsid)` to `UpstreamTokenStorage` interface + +5. Add `GetUpstreamTokensForRefresh(ctx, tsid)` to `UpstreamTokenStorage` interface 6. Implement in `memory.go` (and later Redis) +**Unit Tests:** + +7. Test `SubjectValidator` with various OU/CN patterns +8. Test `extractProxyRunnerIdentity()` with valid/invalid certs +9. Test `TokenExchangeHandler()` mTLS validation and token exchange +10. Test `GetUpstreamTokensForRefresh()` in storage + ### Phase 3: HttpClientBuilder mTLS Support **Networking Package (`pkg/networking/`):** + 1. Add fields to `HttpClientBuilder`: - `clientCertPath string` - `clientKeyPath string` @@ -1571,6 +1610,7 @@ type AuthServerClientConfig struct { ### Phase 4: ProxyRunner Authserver Client **New Package (`pkg/auth/authserver/`):** + 1. Create `client.go`: - `Config` struct - `AuthServerClient` struct @@ -1582,35 +1622,58 @@ type AuthServerClientConfig struct { - `TokenCache` struct - `NewTokenCache()` constructor - `Get()`, `Set()`, `cleanup()` methods -3. Add unit tests + +**Unit Tests:** + +3. Test `AuthServerClient` token exchange with mock HTTP server +4. Test `TokenCache` expiry and cleanup +5. Test `extractTSIDFromJWT()` with valid/invalid JWTs ### Phase 5: RunConfig and Middleware Integration **RunConfig (`pkg/runner/config.go`):** + 1. Add `AuthServerConfig` field to `RunConfig` 2. Add `AuthServerClientConfig` struct **Token Exchange Middleware (`pkg/auth/tokenexchange/`):** + 3. Add `authServerClient` field to middleware 4. Update middleware factory to create authserver client if configured 5. Update `ServeHTTP` to use authserver client when available 6. Add fallback to direct exchange if authserver fails -### Phase 6: MCPServer Controller Integration +**Unit Tests:** + +7. Test middleware with mock authserver client +8. Test fallback behavior when authserver fails + +### Phase 6: MCPServer CRD Updates + +**CRD Changes (`cmd/thv-operator/api/v1alpha1/mcpserver_types.go`):** + +1. Add `AuthServerClientConfig` to `MCPServerSpec` +2. Add `ClientCertificateConfig` type (uses shared `CertManagerIssuerReference`) +3. Run `make generate` and `make manifests` + +### Phase 7: MCPServer Controller Integration **Controller Updates (`cmd/thv-operator/controllers/mcpserver_controller.go`):** + 1. Check for `authServerClientConfig` in spec 2. If `clientCert` specified: - Create cert-manager `Certificate` resource - Wait for certificate to be ready -3. Mount client cert Secret to proxyrunner pod: - - Add volume from Secret - - Add volumeMount to `/etc/toolhive/authserver-mtls` +3. Mount client cert Secret and CA bundle ConfigMap to proxyrunner pod: + - Add volume from Secret (client cert) + - Add volume from ConfigMap (CA bundle) + - Add volumeMounts to `/etc/toolhive/authserver-mtls` and `/etc/toolhive/authserver-ca` 4. Configure runconfig with authserver client paths -### Phase 7: Authserver Service Binary +### Phase 8: Authserver Service Binary **New Entry Point (`cmd/thv-authserver/`):** + 1. Create `main.go` with Cobra CLI 2. Create `app/commands.go` with root command 3. Create `app/serve.go` with serve command: @@ -1629,6 +1692,7 @@ type AuthServerClientConfig struct { ``` **HTTP Server with mTLS (`pkg/authserver/server/`):** + 4. Create `server.go`: - `Server` struct with `net/http.Server` - `NewServer(config, handler)` constructor @@ -1647,13 +1711,15 @@ type AuthServerClientConfig struct { 6. Add readiness endpoint at `/readyz` **Container Image:** + 7. Add Dockerfile at `cmd/thv-authserver/Dockerfile` 8. Add to `Taskfile.yaml` build targets 9. Push to container registry -### Phase 8: MCPAuthServer Controller +### Phase 9: MCPAuthServer Controller **Controller (`cmd/thv-operator/controllers/mcpauthserver_controller.go`):** + 1. Create Deployment with authserver container 2. Configure server TLS from `certificateRef` 3. Mount client CA bundle for mTLS verification @@ -1661,15 +1727,21 @@ type AuthServerClientConfig struct { 5. Create ConfigMap for authserver configuration 6. Handle status conditions and reconciliation -### Phase 9: Testing and Production Readiness +### Phase 10: Integration and E2E Testing + +**Integration Tests:** + +1. mTLS handshake between proxyrunner and authserver +2. Token exchange flow with upstream token refresh + +**E2E Tests:** + +3. Full flow: client → proxyrunner → authserver → upstream token +4. Kubernetes operator tests with cert-manager integration -**Testing:** -1. Unit tests for all new packages -2. Integration tests for mTLS handshake -3. E2E test: client → proxyrunner → authserver → upstream token +**Production Features (future):** -**Production Features:** -4. Redis-backed storage for distributed sessions -5. Metrics for token exchange latency and cache hit rate -6. Audit logging for mTLS identity and token access -7. Certificate rotation handling +5. Redis-backed storage for distributed sessions +6. Metrics for token exchange latency and cache hit rate +7. Audit logging for mTLS identity and token access +8. Certificate rotation handling From 6bc4b82385d75b0747487319cc769c02238f385e Mon Sep 17 00:00:00 2001 From: Trey Date: Sun, 25 Jan 2026 09:37:25 -0800 Subject: [PATCH 4/7] Update THV-00XX-standalone-auth-server-overview.md --- ...HV-00XX-standalone-auth-server-overview.md | 456 +++++++++++++----- 1 file changed, 330 insertions(+), 126 deletions(-) diff --git a/rfcs/THV-00XX-standalone-auth-server-overview.md b/rfcs/THV-00XX-standalone-auth-server-overview.md index 7711a43..4f33679 100644 --- a/rfcs/THV-00XX-standalone-auth-server-overview.md +++ b/rfcs/THV-00XX-standalone-auth-server-overview.md @@ -1,214 +1,418 @@ -# RFC-XXXX: Title +# RFC-XXXX: Standalone Auth Server Kubernetes Deployment -- **Status**: Draft | Under Review | Accepted | Rejected | Implemented | Superseded -- **Author(s)**: Your Name (@github-handle) -- **Created**: YYYY-MM-DD -- **Last Updated**: YYYY-MM-DD -- **Target Repository**: toolhive | toolhive-studio | toolhive-registry | toolhive-registry-server | multiple -- **Related Issues**: [toolhive#1234](https://github.com/stacklok/toolhive/issues/1234) (if applicable) +- **Status**: Draft +- **Author(s)**: Trey Grunnagle +- **Created**: 2025-01-24 +- **Last Updated**: 2025-01-25 +- **Target Repository**: toolhive +- **Related Issues**: [#195](https://github.com/stacklok/stacklok-epics/issues/195) ## Summary -A brief (2-3 sentence) description of the proposed change. This should be clear enough that someone can understand the essence of the proposal without reading the full document. +This RFC proposes deploying `pkg/authserver` as a standalone Kubernetes service with mutual TLS (mTLS) authentication between the authserver and proxyrunner components. The design introduces a new `MCPAuthServer` CRD, extends the `MCPServer` CRD for mTLS client configuration, and implements a secure token exchange flow where proxyrunners can retrieve upstream IDP tokens from the authserver. ## Problem Statement -Describe the problem this RFC is trying to solve. Include: +The ToolHive authserver (`pkg/authserver/`) is a complete OAuth2/OIDC authorization server implementation built on Fosite, but it currently exists as an **unintegrated library**. While the core functionality is implemented and tested (authorization endpoints, token issuance, upstream IDP federation, PKCE support, in-memory storage), there is: -- What is the current behavior or limitation? -- Who is affected by this problem? -- Why is this problem worth solving? +- **No deployment model**: No `cmd/thv-authserver/` entry point, no Kubernetes manifests, no CRD +- **No integration with proxyrunner**: The authserver cannot be used by MCP servers for authentication +- **No operator support**: No way to deploy or manage authserver instances via the ToolHive operator +- **No secure communication channel**: No mTLS or other mechanism for proxyrunners to securely retrieve upstream tokens -## Goals +Who is affected: +- Platform operators who need centralized authentication for MCP servers in Kubernetes +- MCP server developers who need OAuth2/OIDC authentication for their tools +- Security teams requiring secure token exchange between components -List the specific goals this RFC aims to achieve: +Why this is worth solving: +- Enables MCP servers to authenticate users via upstream IDPs (Google, GitHub, etc.) +- Provides a centralized authentication service that multiple MCP servers can share +- Establishes secure mTLS communication for sensitive token exchange operations +- Leverages the existing, tested authserver implementation rather than building new auth infrastructure -- Goal 1 -- Goal 2 -- Goal 3 +## Goals -## Non-Goals +- Deploy authserver as a standalone Kubernetes service via a new `MCPAuthServer` CRD +- Implement mTLS authentication between proxyrunners and the authserver +- Enable secure token exchange allowing proxyrunners to retrieve upstream IDP tokens +- Integrate with cert-manager for automated certificate lifecycle management +- Support RFC 9728 OAuth Protected Resource Metadata discovery flow +- Maintain backward compatibility with existing deployments -Explicitly state what this RFC does NOT aim to address. This helps set scope and prevents scope creep: +## Non-Goals -- Non-goal 1 -- Non-goal 2 +- Implementing a distributed/persistent storage backend (Redis) - this is future work +- Multi-cluster federation or cross-cluster authentication +- Certificate revocation lists (CRL) or OCSP - mentioned as future production feature +- Custom identity providers beyond OIDC-compliant IDPs +- Direct client-to-authserver communication for token refresh (clients handle this separately) ## Proposed Solution -Describe your proposed solution in detail. This section should be comprehensive enough for someone to implement the solution. - ### High-Level Design -Provide an overview of the design. Include diagrams where helpful (Mermaid diagrams are encouraged): +The solution introduces a standalone authserver deployment with mTLS securing communication between proxyrunners and the authserver. ```mermaid -flowchart LR - A[Component A] --> B[Component B] - B --> C[Component C] +flowchart TB + subgraph Clients + A[MCP Client/Agent] + end + + subgraph Kubernetes Cluster + subgraph toolhive-system + AS[MCPAuthServer
mTLS Server] + CA[cert-manager CA
ClusterIssuer] + end + + subgraph mcp-servers namespace + PR1[ProxyRunner A
mTLS Client] + PR2[ProxyRunner B
mTLS Client] + end + end + + subgraph External + IDP[Upstream IDP
Google/GitHub/etc] + end + + A -->|1. OAuth Discovery| PR1 + A -->|2. Auth Flow| AS + AS -->|3. Federate| IDP + A -->|4. JWT + MCP Request| PR1 + PR1 -->|5. mTLS Token Exchange| AS + + CA -.->|Signs| AS + CA -.->|Signs| PR1 + CA -.->|Signs| PR2 ``` ### Detailed Design -Break down the implementation details: +Refer to [THV-00XX-standalone-auth-server-design.md](./THV-00XX-standalone-auth-server-design.md) for code snippets and additional details. + +#### New MCPAuthServer CRD + +A new Custom Resource Definition for deploying the authserver as a standalone service: + +```yaml +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPAuthServer +metadata: + name: main-authserver + namespace: toolhive-system +spec: + issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" + replicas: 2 + port: 8443 + + upstreamIdp: + type: oidc + oidc: + issuer: "https://accounts.google.com" + clientId: "..." + clientSecretRef: + name: authserver-secrets + key: oidc-client-secret + + signingKey: + secretRef: + name: authserver-signing-key + key: private.pem + algorithm: RS256 + + tls: + serverCert: + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + duration: "8760h" + renewBefore: "720h" + + clientAuth: + caBundle: + configMapRef: + name: toolhive-mtls-ca-bundle + key: ca.crt + allowedSubjects: + organizationalUnits: + - "toolhive-system" + - "mcp-servers" + commonNamePattern: "^[a-z0-9-]+\\.proxyrunner\\.[a-z0-9-]+\\.toolhive\\.local$" +``` + +The MCPAuthServer controller creates: +1. **cert-manager Certificate** for server TLS +2. **Deployment** running the authserver image +3. **Service** (ClusterIP) for internal access +4. **ConfigMap** for runtime configuration +5. **ServiceAccount** for RBAC + +#### MCPServer CRD Updates + +Add `authServerClientConfig` to configure mTLS client certificates for proxyrunners: + +```yaml +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPServer +metadata: + name: github-tools + namespace: mcp-servers +spec: + image: ghcr.io/example/github-mcp:latest + + oidcConfig: + type: inline + resourceUrl: "https://github-tools.example.com/" + inline: + issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" + audience: "github-tools" + + authServerClientConfig: + url: "https://mcp-authserver.toolhive-system.svc.cluster.local" + clientCert: + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + duration: "2160h" + renewBefore: "360h" + caBundle: + configMapRef: + name: toolhive-mtls-ca-bundle + key: ca.crt +``` + +#### OAuth Discovery Flow (RFC 9728) + +Clients discover the authserver through the proxyrunner's protected resource metadata: + +1. Client sends unauthenticated MCP request +2. ProxyRunner returns 401 with `WWW-Authenticate: Bearer resource_metadata="/.well-known/oauth-protected-resource"` +3. Client fetches `/.well-known/oauth-protected-resource` from proxyrunner +4. Response includes `authorization_servers` pointing to the authserver +5. Client fetches OIDC discovery from authserver +6. Client completes OAuth flow with authserver +7. Client uses issued JWT for subsequent MCP requests + +#### Token Exchange Flow + +When a proxyrunner needs upstream IDP tokens (e.g., to call a GitHub API): + +1. Proxyrunner validates client JWT, extracts `tsid` (token session ID) +2. Proxyrunner calls authserver's `/internal/token-exchange` endpoint via mTLS +3. Authserver verifies proxyrunner's client certificate +4. Authserver validates the client JWT and retrieves stored upstream tokens +5. Authserver returns upstream access token to proxyrunner +6. Proxyrunner uses upstream token to call backend services #### Component Changes -Describe changes to existing components or new components being introduced. +| Component | Changes | +|-----------|---------| +| `pkg/networking/http_client.go` | Add `WithClientCertificate()` method to `HttpClientBuilder` | +| `pkg/authserver/server/handlers/` | Add token exchange endpoint, subject validator, mTLS identity extraction | +| `pkg/authserver/storage/` | Add `GetUpstreamTokensForRefresh()` method | +| `pkg/auth/authserver/` | New package for proxyrunner authserver client with caching | +| `pkg/runner/config.go` | Add `AuthServerConfig` struct | +| `cmd/thv-authserver/` | New service binary entry point | +| `cmd/thv-operator/api/v1alpha1/` | New CRD types for MCPAuthServer and shared cert-manager types | +| `cmd/thv-operator/controllers/` | New MCPAuthServer controller, updates to MCPServer controller | #### API Changes -Document any API changes, including: +**New Token Exchange Endpoint:** + +``` +POST /internal/token-exchange +Content-Type: application/x-www-form-urlencoded + +grant_type=urn:ietf:params:oauth:grant-type:token-exchange +subject_token={client_jwt} +subject_token_type=urn:ietf:params:oauth:token-type:jwt + +Response: +{ + "access_token": "{upstream_access_token}", + "token_type": "Bearer", + "expires_in": 3600 +} +``` -- New endpoints or methods -- Modified signatures -- Deprecated functionality +**HttpClientBuilder Extension:** ```go -// Example API change -type NewInterface interface { - Method(ctx context.Context, param string) (Result, error) -} +func (b *HttpClientBuilder) WithClientCertificate(certPath, keyPath string) *HttpClientBuilder ``` #### Configuration Changes -Document any new configuration options: +**RunConfig Extension:** -```yaml -# Example configuration -newFeature: - enabled: true - setting: value +```json +{ + "authserver_config": { + "url": "https://mcp-authserver.toolhive-system.svc.cluster.local", + "client_cert_path": "/etc/toolhive/authserver-mtls/tls.crt", + "client_key_path": "/etc/toolhive/authserver-mtls/tls.key", + "ca_bundle_path": "/etc/toolhive/authserver-ca/ca.crt" + } +} ``` -#### Data Model Changes - -Describe any changes to data models, schemas, or storage. - ## Security Considerations -**This section is required.** Security is a core concern for ToolHive. Address the following: - ### Threat Model -- What are the potential threats introduced by this change? -- Who are the potential attackers and what are their capabilities? +| Threat | Description | Mitigation | +|--------|-------------|------------| +| Rogue Service | Unauthorized service attempts to retrieve upstream tokens | mTLS client certificate verification with `allowedSubjects` | +| Token Theft | Attacker intercepts tokens in transit | All communication uses TLS; mTLS for sensitive token exchange | +| Session Hijacking | Compromised proxyrunner requests tokens for other sessions | Audience validation binds sessions to specific MCPServers | +| Certificate Impersonation | Attacker presents forged certificate | Certificates signed only by trusted cert-manager CA | ### Authentication and Authorization -- How does this change affect authentication? -- What authorization checks are required? -- Are there any changes to permission models? +- **Proxyrunner → Authserver**: mTLS with per-MCPServer client certificates +- **Client → Authserver**: OAuth 2.0 with PKCE +- **Certificate Identity**: CN includes MCPServer name and namespace; OU contains namespace for access control +- **Subject Validation**: Configurable `allowedSubjects` restricts which namespaces/certificates can connect ### Data Security -- What sensitive data does this feature handle? -- How is data protected at rest and in transit? -- Are there any data retention or deletion considerations? - -### Input Validation - -- What user input does this feature accept? -- How is input validated and sanitized? -- What are the potential injection vectors? +- **Upstream tokens**: Stored in authserver memory, linked to session ID (`tsid`) +- **Transit protection**: All internal communication uses mTLS +- **Token caching**: Proxyrunner caches exchanged tokens with TTL based on token expiry +- **No credential exposure**: Upstream refresh tokens never leave the authserver ### Secrets Management -- Does this feature require any secrets or credentials? -- How are secrets stored and accessed? -- Are secrets properly rotated and revocable? +| Secret | Storage | Rotation | +|--------|---------|----------| +| Server TLS certificate | Kubernetes Secret (managed by cert-manager) | Automatic via cert-manager | +| Client TLS certificates | Kubernetes Secret (managed by cert-manager) | Automatic via cert-manager | +| Upstream IDP client secret | Kubernetes Secret (referenced by CRD) | Manual rotation | +| JWT signing key | Kubernetes Secret (referenced by CRD) | Manual rotation | ### Audit and Logging -- What security-relevant events should be logged? -- Are there any compliance requirements? - -### Mitigations - -- What security controls are implemented? -- How do these mitigations address the identified threats? +- Token exchange requests logged with proxyrunner identity (namespace, name, cert serial) +- Access denials logged with rejection reason +- Certificate validation failures logged ## Alternatives Considered -Describe alternative approaches you considered and why you chose the proposed solution: +### Alternative 1: Service Mesh mTLS (Istio/Linkerd) + +- **Description**: Use service mesh for mTLS instead of application-level certificates +- **Pros**: Automatic certificate management, no application changes +- **Cons**: Requires service mesh deployment, additional operational complexity, less fine-grained control over certificate subjects +- **Why not chosen**: Not all deployments use service mesh; application-level mTLS provides more control over identity binding -### Alternative 1: [Name] +### Alternative 2: Shared Secret Authentication -- Description -- Pros -- Cons -- Why not chosen +- **Description**: Use pre-shared secrets/API keys for proxyrunner authentication +- **Pros**: Simpler implementation, no PKI required +- **Cons**: Secrets must be manually rotated, harder to audit individual proxyrunners, no cryptographic binding +- **Why not chosen**: mTLS provides stronger security guarantees and automatic rotation via cert-manager -### Alternative 2: [Name] +### Alternative 3: JWT-Based Service Authentication -- Description -- Pros -- Cons -- Why not chosen +- **Description**: Proxyrunners use JWTs (possibly Kubernetes ServiceAccount tokens) to authenticate to authserver +- **Pros**: No additional certificates needed +- **Cons**: ServiceAccount tokens are cluster-scoped, less control over identity claims +- **Why not chosen**: mTLS provides mutual authentication and better integration with the existing certificate infrastructure ## Compatibility ### Backward Compatibility -- Is this change backward compatible? -- If not, what is the migration path? -- Are there any deprecation timelines? +- Existing MCPServer deployments without `authServerClientConfig` continue to work unchanged +- The `authServerClientConfig` field is optional +- Existing proxyrunner OIDC validation continues to work (can point directly to upstream IDP or to authserver) ### Forward Compatibility -- How does this design accommodate future changes? -- Are there extensibility points? +- CRD design accommodates future storage backends (Redis) via abstraction +- Certificate configuration extensible to support additional issuer types +- Token exchange endpoint follows RFC 8693 patterns for future scope expansion ## Implementation Plan -Outline the implementation approach: - -### Phase 1: [Description] - -- Task 1 -- Task 2 - -### Phase 2: [Description] - -- Task 3 -- Task 4 - -### Dependencies - -List any dependencies on other work or external factors. +### Phase 1: MCPAuthServer CRD and Shared Types +- Create shared `CertManagerIssuerReference` type +- Create MCPAuthServer CRD types +- Run code generation + +### Phase 2: Authserver Server-Side mTLS +- Implement `SubjectValidator` for certificate validation +- Implement `extractProxyRunnerIdentity()` for mTLS identity extraction +- Implement `TokenExchangeHandler` endpoint +- Add storage methods for token refresh + +### Phase 3: HttpClientBuilder mTLS Support +- Add `WithClientCertificate()` method +- Update `Build()` to configure client certificates + +### Phase 4: ProxyRunner Authserver Client +- Create `AuthServerClient` with mTLS configuration +- Implement `TokenCache` for caching exchanged tokens +- Implement token exchange logic + +### Phase 5: RunConfig and Middleware Integration +- Extend `RunConfig` with authserver configuration +- Update token exchange middleware to use authserver client + +### Phase 6: MCPServer CRD Updates +- Add `AuthServerClientConfig` to MCPServerSpec +- Add `ClientCertificateConfig` type + +### Phase 7: MCPServer Controller Integration +- Create cert-manager Certificate for proxyrunner client certs +- Mount certificates and CA bundle to pods +- Configure runconfig with certificate paths + +### Phase 8: Authserver Service Binary +- Create `cmd/thv-authserver/` entry point +- Implement HTTP server with mTLS +- Add health/readiness endpoints +- Create Dockerfile and build configuration + +### Phase 9: MCPAuthServer Controller +- Implement controller reconciliation logic +- Create Deployment, Service, ConfigMap resources +- Handle certificate readiness + +### Phase 10: Integration and E2E Testing +- mTLS handshake tests +- Token exchange flow tests +- Full end-to-end tests with cert-manager ## Testing Strategy -Describe how this feature will be tested: - -- Unit tests -- Integration tests -- End-to-end tests -- Performance tests -- Security tests +- **Unit tests**: SubjectValidator, identity extraction, token cache, HTTP client builder +- **Integration tests**: mTLS handshake, token exchange with mock authserver, certificate validation +- **End-to-end tests**: Full OAuth flow through proxyrunner to authserver, Kubernetes operator with cert-manager +- **Security tests**: Invalid certificates rejected, unauthorized namespaces denied, audience validation enforced ## Documentation -What documentation needs to be created or updated? - -- User documentation -- API documentation -- Architecture documentation -- Runbooks or operational guides +- User guide for deploying MCPAuthServer +- Configuration reference for MCPAuthServer and MCPServer CRDs +- Cert-manager integration guide (ClusterIssuer setup) +- Architecture documentation for token exchange flow +- Troubleshooting guide for mTLS issues ## Open Questions -List any unresolved questions that need to be addressed during the review: - -1. Question 1 -2. Question 2 +1. Should the authserver support multiple upstream IDPs per instance, or one IDP per MCPAuthServer? ## References -- Link to relevant documentation -- Link to related RFCs or proposals -- Link to external specifications or standards +- [RFC 9728: OAuth 2.0 Protected Resource Metadata](https://datatracker.ietf.org/doc/html/rfc9728) +- [RFC 8693: OAuth 2.0 Token Exchange](https://datatracker.ietf.org/doc/html/rfc8693) +- [cert-manager Documentation](https://cert-manager.io/docs/) +- [Fosite OAuth2 Framework](https://github.com/ory/fosite) +- [THV-00XX-standalone-auth-server-design.md](THV-00XX-standalone-auth-server-design.md) - Detailed design document --- @@ -220,10 +424,10 @@ List any unresolved questions that need to be addressed during the review: | Date | Reviewer | Decision | Notes | |------|----------|----------|-------| -| YYYY-MM-DD | @reviewer | Under Review | Initial submission | +| 2025-01-24 | - | Draft | Initial submission | ### Implementation Tracking | Repository | PR | Status | |------------|-----|--------| -| toolhive | #XXXX | Merged | +| toolhive | - | Pending | From cf433e56f3929c1552eaf227be750ae7844359cd Mon Sep 17 00:00:00 2001 From: Trey Date: Sun, 25 Jan 2026 09:41:10 -0800 Subject: [PATCH 5/7] Rename to 0028 --- ...er-design.md => THV-0028-standalone-auth-server-design.md} | 0 ...verview.md => THV-0028-standalone-auth-server-overview.md} | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename rfcs/{THV-00XX-standalone-auth-server-design.md => THV-0028-standalone-auth-server-design.md} (100%) rename rfcs/{THV-00XX-standalone-auth-server-overview.md => THV-0028-standalone-auth-server-overview.md} (99%) diff --git a/rfcs/THV-00XX-standalone-auth-server-design.md b/rfcs/THV-0028-standalone-auth-server-design.md similarity index 100% rename from rfcs/THV-00XX-standalone-auth-server-design.md rename to rfcs/THV-0028-standalone-auth-server-design.md diff --git a/rfcs/THV-00XX-standalone-auth-server-overview.md b/rfcs/THV-0028-standalone-auth-server-overview.md similarity index 99% rename from rfcs/THV-00XX-standalone-auth-server-overview.md rename to rfcs/THV-0028-standalone-auth-server-overview.md index 4f33679..9948369 100644 --- a/rfcs/THV-00XX-standalone-auth-server-overview.md +++ b/rfcs/THV-0028-standalone-auth-server-overview.md @@ -1,4 +1,4 @@ -# RFC-XXXX: Standalone Auth Server Kubernetes Deployment +# RFC-0028: Standalone Auth Server Kubernetes Deployment - **Status**: Draft - **Author(s)**: Trey Grunnagle @@ -89,7 +89,7 @@ flowchart TB ### Detailed Design -Refer to [THV-00XX-standalone-auth-server-design.md](./THV-00XX-standalone-auth-server-design.md) for code snippets and additional details. +Refer to [THV-0028-standalone-auth-server-design.md](./THV-0028-standalone-auth-server-design.md) for code snippets and additional details. #### New MCPAuthServer CRD From 0821f2db233881b598c6476ad9ed76b6faf241c5 Mon Sep 17 00:00:00 2001 From: Trey Date: Mon, 26 Jan 2026 09:45:57 -0800 Subject: [PATCH 6/7] Address some feedback - move `AuthServerConfig` from `MCPServer` to `MCPExternalAuthConfig` - use SPIFFE urls for allowed clients --- .../THV-0028-standalone-auth-server-design.md | 391 +++++++++++------- ...HV-0028-standalone-auth-server-overview.md | 132 +++--- 2 files changed, 287 insertions(+), 236 deletions(-) diff --git a/rfcs/THV-0028-standalone-auth-server-design.md b/rfcs/THV-0028-standalone-auth-server-design.md index e756f9e..b193151 100644 --- a/rfcs/THV-0028-standalone-auth-server-design.md +++ b/rfcs/THV-0028-standalone-auth-server-design.md @@ -91,14 +91,15 @@ spec: key: ca.crt # Allowed client certificate patterns (for access control) + # Uses SPIFFE URI format: spiffe://{trust.domain}/ns/{namespace}/mcpserver/{name} allowedSubjects: + # Trust domain for SPIFFE URIs (required) + trustDomain: "toolhive.local" # Allow proxyrunners from specific namespaces - organizationalUnits: + allowedNamespaces: - "toolhive-system" - "mcp-servers" - "mcp-production" - # CN pattern: {mcpserver-name}.proxyrunner.{namespace}.toolhive.local - commonNamePattern: "^[a-z0-9-]+\\.proxyrunner\\.[a-z0-9-]+\\.toolhive\\.local$" ``` **MCPAuthServer CRD mTLS Types:** @@ -147,15 +148,18 @@ type ClientAuthConfig struct { } // AllowedSubjects defines which certificate subjects are allowed to connect +// Uses SPIFFE URI format: spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name} type AllowedSubjects struct { - // OrganizationalUnits is a list of allowed OU values (typically namespaces) - // Client cert must have at least one matching OU - // +optional - OrganizationalUnits []string `json:"organizationalUnits,omitempty"` + // TrustDomain is the SPIFFE trust domain for validating client certificate URIs + // Example: "toolhive.local" + // +kubebuilder:validation:Required + TrustDomain string `json:"trustDomain"` - // CommonNamePattern is a regex pattern for allowed CN values + // AllowedNamespaces is a list of Kubernetes namespaces whose MCPServers are allowed + // The SPIFFE URI must match: spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name} + // If empty, all namespaces are allowed (only trustDomain is validated) // +optional - CommonNamePattern string `json:"commonNamePattern,omitempty"` + AllowedNamespaces []string `json:"allowedNamespaces,omitempty"` } ``` @@ -212,35 +216,49 @@ The controller watches for changes to the MCPAuthServer CR and reconciles the de --- -#### 1.2 MCPServer Updates +#### 1.2 MCPExternalAuthConfig Updates (authServer type) -Add a new field to `MCPServerSpec` for configuring the client certificate used when the proxyrunner communicates with the authserver: +Add a new external auth type `authServer` to `MCPExternalAuthConfig` for configuring the proxyrunner's mTLS client authentication to the standalone authserver. This leverages the existing `MCPExternalAuthConfig` pattern and allows the same authserver configuration to be shared across multiple MCPServer resources. -**CRD Addition** ([cmd/thv-operator/api/v1alpha1/mcpserver_types.go](cmd/thv-operator/api/v1alpha1/mcpserver_types.go)): +**CRD Addition** ([cmd/thv-operator/api/v1alpha1/mcpexternalauthconfig_types.go](cmd/thv-operator/api/v1alpha1/mcpexternalauthconfig_types.go)): ```go -// MCPServerSpec defines the desired state of MCPServer -type MCPServerSpec struct { +// Add new auth type constant +const ( + // ExternalAuthTypeAuthServer is the type for standalone authserver mTLS authentication + // Used when proxyrunner needs to exchange client JWTs for upstream tokens via the authserver + ExternalAuthTypeAuthServer ExternalAuthType = "authServer" +) + +// MCPExternalAuthConfigSpec defines the desired state of MCPExternalAuthConfig. +type MCPExternalAuthConfigSpec struct { + // Type is the type of external authentication to configure + // +kubebuilder:validation:Enum=tokenExchange;headerInjection;bearerToken;unauthenticated;authServer + // +kubebuilder:validation:Required + Type ExternalAuthType `json:"type"` + // ... existing fields ... - // AuthServerClientConfig configures how the proxyrunner authenticates to the authserver + // AuthServer configures mTLS client authentication to a standalone authserver + // Only used when Type is "authServer" // +optional - AuthServerClientConfig *AuthServerClientConfig `json:"authServerClientConfig,omitempty"` + AuthServer *AuthServerConfig `json:"authServer,omitempty"` } -// AuthServerClientConfig configures mTLS client authentication to the authserver -type AuthServerClientConfig struct { +// AuthServerConfig configures mTLS client authentication to the standalone authserver +type AuthServerConfig struct { // URL is the authserver base URL // +kubebuilder:validation:Required URL string `json:"url"` // ClientCert configures automatic client certificate provisioning for mTLS - // If specified, the controller creates the Certificate and mounts it to the pod + // If specified, the MCPExternalAuthConfig controller creates the Certificate + // and the MCPServer controller mounts it to the proxyrunner pod // +optional ClientCert *ClientCertificateConfig `json:"clientCert,omitempty"` // CABundle references a ConfigMap containing the CA bundle for verifying authserver - // Reuses existing CABundleSource type (defined at mcpserver_types.go:493-499) + // Reuses existing CABundleSource type // +optional CABundle *CABundleSource `json:"caBundle,omitempty"` } @@ -251,19 +269,17 @@ type ClientCertificateConfig struct { // +kubebuilder:validation:Required IssuerRef CertManagerIssuerReference `json:"issuerRef"` - // Duration is the certificate validity period (default: 2160h / 90 days) - // +kubebuilder:default="2160h" - // +optional - Duration string `json:"duration,omitempty"` + // TrustDomain is the SPIFFE trust domain for the client certificate URI SAN + // The certificate will include: spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name} + // +kubebuilder:validation:Required + TrustDomain string `json:"trustDomain"` - // RenewBefore is when to renew before expiry (default: 360h / 15 days) - // +kubebuilder:default="360h" - // +optional - RenewBefore string `json:"renewBefore,omitempty"` + // Note: duration and renewBefore use reasonable defaults (90 days / 15 days) + // and are not exposed in the CRD to keep the API simple } // CertManagerIssuerReference references a cert-manager Issuer or ClusterIssuer -// NOTE: This type is shared between MCPAuthServer and MCPServer CRDs. +// NOTE: This type is shared between MCPAuthServer and MCPExternalAuthConfig CRDs. // Define in cmd/thv-operator/api/v1alpha1/certmanager_types.go (new file) type CertManagerIssuerReference struct { // Name of the issuer @@ -276,40 +292,30 @@ type CertManagerIssuerReference struct { } ``` -**Example MCPServer with mTLS:** +**Example MCPExternalAuthConfig for authServer:** ```yaml apiVersion: toolhive.stacklok.dev/v1alpha1 -kind: MCPServer +kind: MCPExternalAuthConfig metadata: - name: github-tools + name: main-authserver-client namespace: mcp-servers spec: - image: ghcr.io/example/github-mcp:latest - - # OIDC config - proxyrunner validates JWTs using authserver's JWKS - # The issuer also appears in /.well-known/oauth-protected-resource for client discovery - oidcConfig: - type: inline - resourceUrl: "https://github-tools.example.com/" - inline: - issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" - audience: "github-tools" + type: authServer - # NEW: mTLS client config for proxyrunner → authserver communication - authServerClientConfig: + authServer: url: "https://mcp-authserver.toolhive-system.svc.cluster.local" # Controller will create a cert-manager Certificate with: - # - CN: github-tools.proxyrunner.mcp-servers.toolhive.local - # - O: ToolHive ProxyRunner - # - OU: mcp-servers + # - CN: {mcpserver-name} (human-readable for audit logs) + # - URI SAN: spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name} + # The certificate uses reasonable defaults: 90 days validity, renew at 15 days before expiry clientCert: + # Trust domain for SPIFFE URIs in client certificates + trustDomain: "toolhive.local" issuerRef: name: toolhive-mtls-ca kind: ClusterIssuer - duration: "2160h" # 90 days - renewBefore: "360h" # 15 days # CA bundle for verifying authserver's server certificate caBundle: @@ -318,36 +324,63 @@ spec: key: ca.crt ``` +**Example MCPServer referencing the authServer config:** + +```yaml +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPServer +metadata: + name: github-tools + namespace: mcp-servers +spec: + image: ghcr.io/example/github-mcp:latest + + # OIDC config - proxyrunner validates JWTs using authserver's JWKS + # The issuer also appears in /.well-known/oauth-protected-resource for client discovery + oidcConfig: + type: inline + resourceUrl: "https://github-tools.example.com/" + inline: + issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" + audience: "github-tools" + + # Reference the MCPExternalAuthConfig for authserver mTLS + externalAuthConfigRef: + name: main-authserver-client +``` + **Controller Behavior:** -When `authServerClientConfig` is specified, the MCPServer controller: +When an `MCPExternalAuthConfig` with `type: authServer` is referenced by an MCPServer: -1. **Creates a cert-manager Certificate** (if `clientCert` specified): +1. **MCPExternalAuthConfig Controller creates a cert-manager Certificate** (if `clientCert` specified): ```yaml apiVersion: cert-manager.io/v1 kind: Certificate metadata: - name: github-tools-proxyrunner-client + name: github-tools-mtls namespace: mcp-servers spec: - secretName: github-tools-proxyrunner-client-tls - commonName: github-tools.proxyrunner.mcp-servers.toolhive.local - subject: - organizations: ["ToolHive ProxyRunner"] - organizationalUnits: ["mcp-servers"] - usages: ["client auth"] + secretName: github-tools-mtls + duration: 2160h # 90 days (default) + renewBefore: 360h # 15 days (default) + commonName: github-tools # Human-readable for audit logs + uris: + - "spiffe://toolhive.local/ns/mcp-servers/mcpserver/github-tools" + usages: + - client auth issuerRef: name: toolhive-mtls-ca kind: ClusterIssuer ``` -2. **Mounts certificates to the proxyrunner pod:** +2. **MCPServer Controller mounts certificates to the proxyrunner pod:** ```yaml volumes: # Client certificate for mTLS (if clientCert specified) - name: authserver-client-cert secret: - secretName: github-tools-proxyrunner-client-tls + secretName: github-tools-mtls # CA bundle for verifying authserver (if caBundle specified) - name: authserver-ca-bundle configMap: @@ -361,7 +394,7 @@ When `authServerClientConfig` is specified, the MCPServer controller: readOnly: true ``` -3. **Sets environment variables or runconfig:** +3. **MCPServer Controller sets runconfig:** ```json { "authserver_config": { @@ -564,8 +597,9 @@ Each MCPServer gets its own client certificate. This allows the authserver to id │ Server Cert │ │ Client Cert │ │ Client Cert │ │ (server auth) │ │ (client auth) │ │ (client auth) │ │ │ │ │ │ │ -│ CN: mcp-auth │ │ CN: a.proxy │ │ CN: b.proxy │ -│ server... │ │ runner... │ │ runner... │ +│ CN: mcp-auth │ │ CN: server-a │ │ CN: server-b │ +│ server... │ │ URI: spiffe://│ │ URI: spiffe://│ +│ │ │ .../server-a │ │ .../server-b │ └───────────────┘ └───────────────┘ └───────────────┘ │ │ │ │ │ │ @@ -596,17 +630,21 @@ Before sending sensitive data (client JWTs, session IDs), the proxyrunner must v #### How Proxyrunner Authenticates to Authserver -1. **Client Certificate Identity**: +1. **Client Certificate Identity (SPIFFE URI)**: ``` - CN=myserver.proxyrunner.mcp-servers.toolhive.local - O=ToolHive ProxyRunner - OU=mcp-servers (namespace) + CN=github-tools # Human-readable for audit logs + URI SAN=spiffe://toolhive.local/ns/mcp-servers/mcpserver/github-tools ``` + The SPIFFE URI encodes: + - **Trust domain**: `toolhive.local` (organization-specific) + - **Namespace**: `mcp-servers` (Kubernetes namespace) + - **MCPServer name**: `github-tools` + 2. **mTLS Handshake**: - Proxyrunner presents client certificate signed by shared CA - Authserver verifies certificate chain against trusted CA - - Authserver extracts identity from certificate subject + - Authserver extracts identity from SPIFFE URI SAN 3. **Identity Binding**: - Authserver binds proxyrunner identity to token exchange requests @@ -819,21 +857,32 @@ The authserver exposes an internal endpoint for proxyrunners to exchange client ```go // pkg/authserver/server/handlers/token_exchange.go +import ( + "github.com/spiffe/go-spiffe/v2/spiffeid" +) + // ProxyRunnerIdentity represents the identity extracted from a proxyrunner's mTLS client certificate type ProxyRunnerIdentity struct { - // Name is the MCPServer name (extracted from CN before ".proxyrunner") - Name string - // Namespace is the Kubernetes namespace (from OU) + // SpiffeID is the parsed SPIFFE ID from the certificate URI SAN + SpiffeID spiffeid.ID + // TrustDomain is the SPIFFE trust domain (e.g., "toolhive.local") + TrustDomain spiffeid.TrustDomain + // Namespace is the Kubernetes namespace (from SPIFFE URI path) Namespace string - // FullCN is the complete Common Name - FullCN string + // Name is the MCPServer name (from SPIFFE URI path) + Name string + // CommonName is the human-readable CN (for audit logs) + CommonName string // CertificateSerial is the certificate serial number (for audit logging) CertificateSerial string } // extractProxyRunnerIdentity extracts the proxyrunner identity from the mTLS client certificate. -// Returns an error if no client certificate is present or if the certificate doesn't match -// the expected proxyrunner certificate format. +// The certificate must contain a SPIFFE URI SAN in the format: +// spiffe://{trust.domain}/ns/{namespace}/mcpserver/{name} +// +// Returns an error if no client certificate is present or if the certificate doesn't have +// a valid SPIFFE URI SAN. func extractProxyRunnerIdentity(r *http.Request) (*ProxyRunnerIdentity, error) { // Check for TLS connection if r.TLS == nil { @@ -848,40 +897,41 @@ func extractProxyRunnerIdentity(r *http.Request) (*ProxyRunnerIdentity, error) { // Use the leaf certificate (first in chain) cert := r.TLS.PeerCertificates[0] - // Extract namespace from OU (Organizational Unit) - // Certificate format: OU=mcp-servers (the namespace) - var namespace string - if len(cert.Subject.OrganizationalUnit) > 0 { - namespace = cert.Subject.OrganizationalUnit[0] - } else { - return nil, errors.New("client certificate missing OU (namespace)") + // Find and parse SPIFFE ID from certificate URI SANs + var spiffeID spiffeid.ID + var found bool + for _, uri := range cert.URIs { + if uri.Scheme == "spiffe" { + var err error + spiffeID, err = spiffeid.FromURI(uri) + if err != nil { + return nil, fmt.Errorf("invalid SPIFFE URI in certificate: %w", err) + } + found = true + break + } } - // Extract MCPServer name from CN - // Certificate format: CN=github-tools.proxyrunner.mcp-servers.toolhive.local - cn := cert.Subject.CommonName - if cn == "" { - return nil, errors.New("client certificate missing CN") + if !found { + return nil, errors.New("client certificate missing SPIFFE URI SAN") } - // Parse CN to extract MCPServer name - // Expected format: {mcpserver-name}.proxyrunner.{namespace}.toolhive.local - parts := strings.Split(cn, ".") - if len(parts) < 4 || parts[1] != "proxyrunner" { - return nil, fmt.Errorf("invalid CN format: expected {name}.proxyrunner.{ns}.toolhive.local, got %s", cn) + // Parse path segments: /ns/{namespace}/mcpserver/{name} + // spiffeid.ID.Path() returns the path without leading slash + pathSegments := strings.Split(spiffeID.Path(), "/") + if len(pathSegments) != 4 || pathSegments[0] != "ns" || pathSegments[2] != "mcpserver" { + return nil, fmt.Errorf("invalid SPIFFE ID path format: expected ns/{namespace}/mcpserver/{name}, got %s", spiffeID.Path()) } - mcpServerName := parts[0] - // Verify namespace in CN matches OU - cnNamespace := parts[2] - if cnNamespace != namespace { - return nil, fmt.Errorf("CN namespace (%s) doesn't match OU namespace (%s)", cnNamespace, namespace) - } + namespace := pathSegments[1] + mcpServerName := pathSegments[3] return &ProxyRunnerIdentity{ - Name: mcpServerName, + SpiffeID: spiffeID, + TrustDomain: spiffeID.TrustDomain(), Namespace: namespace, - FullCN: cn, + Name: mcpServerName, + CommonName: cert.Subject.CommonName, // For audit logging CertificateSerial: cert.SerialNumber.String(), }, nil } @@ -923,12 +973,12 @@ func NewHandler( }, nil } -// SubjectValidator validates client certificate subjects against allowed patterns +// SubjectValidator validates client certificate SPIFFE IDs against allowed patterns type SubjectValidator struct { - // allowedOUs is a set of allowed Organizational Unit values (typically namespaces) - allowedOUs map[string]bool - // cnPattern is a compiled regex for validating Common Name format - cnPattern *regexp.Regexp + // trustDomain is the required SPIFFE trust domain + trustDomain spiffeid.TrustDomain + // allowedNamespaces is a set of allowed Kubernetes namespaces (nil means all allowed) + allowedNamespaces map[string]bool } // NewSubjectValidator creates a validator from MCPAuthServer CRD configuration @@ -938,23 +988,22 @@ func NewSubjectValidator(allowedSubjects *AllowedSubjects) (*SubjectValidator, e return &SubjectValidator{}, nil } - validator := &SubjectValidator{} + // Parse and validate trust domain + td, err := spiffeid.TrustDomainFromString(allowedSubjects.TrustDomain) + if err != nil { + return nil, fmt.Errorf("invalid trustDomain %q: %w", allowedSubjects.TrustDomain, err) + } - // Build allowed OU set - if len(allowedSubjects.OrganizationalUnits) > 0 { - validator.allowedOUs = make(map[string]bool, len(allowedSubjects.OrganizationalUnits)) - for _, ou := range allowedSubjects.OrganizationalUnits { - validator.allowedOUs[ou] = true - } + validator := &SubjectValidator{ + trustDomain: td, } - // Compile CN pattern - if allowedSubjects.CommonNamePattern != "" { - pattern, err := regexp.Compile(allowedSubjects.CommonNamePattern) - if err != nil { - return nil, fmt.Errorf("invalid commonNamePattern: %w", err) + // Build allowed namespaces set + if len(allowedSubjects.AllowedNamespaces) > 0 { + validator.allowedNamespaces = make(map[string]bool, len(allowedSubjects.AllowedNamespaces)) + for _, ns := range allowedSubjects.AllowedNamespaces { + validator.allowedNamespaces[ns] = true } - validator.cnPattern = pattern } return validator, nil @@ -963,24 +1012,27 @@ func NewSubjectValidator(allowedSubjects *AllowedSubjects) (*SubjectValidator, e // validateSubjectAllowed checks if a proxyrunner identity is allowed based on // the allowedSubjects configuration from the MCPAuthServer CRD. // +// Validates: +// 1. SPIFFE trust domain matches the configured trust domain +// 2. Namespace is in the allowed list (if configured) +// // Returns nil if allowed, error if rejected. func (v *SubjectValidator) validateSubjectAllowed(identity *ProxyRunnerIdentity) error { - // If no restrictions configured, allow all - if v.allowedOUs == nil && v.cnPattern == nil { + // If no trust domain configured, allow all + if v.trustDomain.IsZero() { return nil } - // Check OU (namespace) restriction - if v.allowedOUs != nil { - if !v.allowedOUs[identity.Namespace] { - return fmt.Errorf("namespace %q is not in allowed list", identity.Namespace) - } + // Check trust domain matches + if !identity.TrustDomain.Equal(v.trustDomain) { + return fmt.Errorf("trust domain %q does not match required %q", + identity.TrustDomain, v.trustDomain) } - // Check CN pattern restriction - if v.cnPattern != nil { - if !v.cnPattern.MatchString(identity.FullCN) { - return fmt.Errorf("CN %q does not match allowed pattern", identity.FullCN) + // Check namespace restriction (if configured) + if v.allowedNamespaces != nil { + if !v.allowedNamespaces[identity.Namespace] { + return fmt.Errorf("namespace %q is not in allowed list", identity.Namespace) } } @@ -988,14 +1040,14 @@ func (v *SubjectValidator) validateSubjectAllowed(identity *ProxyRunnerIdentity) } // validateSessionAudience verifies that the proxyrunner is authorized to access this session. -// The session's audience (from the JWT) should match the MCPServer identified by the client cert. +// The session's audience (from the JWT) should match the MCPServer identified by the SPIFFE ID. // // This prevents a compromised proxyrunner in namespace A from requesting tokens for sessions // that were intended for a different MCPServer in namespace B. // // Audience matching rules: // 1. If JWT has "aud" claim, it must match the proxyrunner's MCPServer name -// 2. The namespace/name combination provides additional binding +// 2. The SPIFFE ID namespace/name combination provides additional binding func (h *Handler) validateSessionAudience(claims map[string]interface{}, identity *ProxyRunnerIdentity) error { // Extract audience from JWT claims // Audience can be a string or array of strings @@ -1025,10 +1077,12 @@ func (h *Handler) validateSessionAudience(claims map[string]interface{}, identit // Accept either: // - Just the MCPServer name: "github-tools" // - Fully qualified: "github-tools.mcp-servers" (name.namespace) + // - SPIFFE ID: "spiffe://toolhive.local/ns/mcp-servers/mcpserver/github-tools" // - Service URL format: "https://github-tools.mcp-servers.svc.cluster.local" expectedAudiences := map[string]bool{ identity.Name: true, fmt.Sprintf("%s.%s", identity.Name, identity.Namespace): true, + identity.SpiffeID.String(): true, } // Check if any JWT audience matches expected @@ -1046,29 +1100,29 @@ func (h *Handler) validateSessionAudience(claims map[string]interface{}, identit } } - return fmt.Errorf("token audience %v does not match proxyrunner %s/%s", - audiences, identity.Namespace, identity.Name) + return fmt.Errorf("token audience %v does not match proxyrunner SPIFFE ID %s", + audiences, identity.SpiffeID) } func (h *Handler) TokenExchangeHandler(w http.ResponseWriter, r *http.Request) { ctx := r.Context() - // 1. Extract proxyrunner identity from mTLS client cert + // 1. Extract proxyrunner identity from mTLS client cert SPIFFE ID identity, err := extractProxyRunnerIdentity(r) if err != nil { - logger.Warnf("mTLS identity extraction failed: %v", err) - h.writeError(w, fosite.ErrAccessDenied.WithHint("mTLS client certificate required")) + logger.Warnf("SPIFFE identity extraction failed: %v", err) + h.writeError(w, fosite.ErrAccessDenied.WithHint("mTLS client certificate with SPIFFE URI required")) return } - logger.Infof("Token exchange request from proxyrunner %s/%s (cert serial: %s)", - identity.Namespace, identity.Name, identity.CertificateSerial) + logger.Infof("Token exchange request from proxyrunner %s (cert serial: %s)", + identity.SpiffeID, identity.CertificateSerial) - // 2. Validate proxyrunner is allowed based on allowedSubjects from MCPAuthServer CRD + // 2. Validate proxyrunner SPIFFE ID is allowed based on allowedSubjects from MCPAuthServer CRD // h.subjectValidator is initialized from config at startup if err := h.subjectValidator.validateSubjectAllowed(identity); err != nil { - logger.Warnf("Proxyrunner %s/%s rejected by subject policy: %v", - identity.Namespace, identity.Name, err) + logger.Warnf("Proxyrunner %s rejected by subject policy: %v", + identity.SpiffeID, err) h.writeError(w, fosite.ErrAccessDenied.WithHintf( "proxyrunner not allowed: %s", err.Error())) return @@ -1112,10 +1166,10 @@ func (h *Handler) TokenExchangeHandler(w http.ResponseWriter, r *http.Request) { } // 6. Verify proxyrunner is authorized for this specific session - // The session's audience should match the MCPServer making the request + // The session's audience should match the MCPServer identified by the SPIFFE ID if err := h.validateSessionAudience(claims, identity); err != nil { - logger.Warnf("Session audience mismatch for proxyrunner %s/%s: %v", - identity.Namespace, identity.Name, err) + logger.Warnf("Session audience mismatch for proxyrunner %s: %v", + identity.SpiffeID, err) h.writeError(w, fosite.ErrAccessDenied.WithHintf( "proxyrunner not authorized for this session: %s", err.Error())) return @@ -1524,16 +1578,23 @@ type AuthServerClientConfig struct { | `cmd/thv-operator/api/v1alpha1/mcpauthserver_types.go` | MCPAuthServer CRD with mTLS config | | `cmd/thv-operator/controllers/mcpauthserver_controller.go` | MCPAuthServer reconciler | | `pkg/authserver/server/handlers/token_exchange.go` | Token exchange endpoint for proxyrunners | -| `pkg/authserver/server/handlers/subject_validator.go` | mTLS subject validation (allowedSubjects) | +| `pkg/authserver/server/handlers/subject_validator.go` | SPIFFE ID validation (allowedSubjects) | | `pkg/auth/authserver/client.go` | Client for proxyrunner → authserver mTLS calls | | `pkg/auth/authserver/cache.go` | Token cache for exchanged tokens | +### New Dependencies + +| Package | Purpose | +|---------|---------| +| `github.com/spiffe/go-spiffe/v2/spiffeid` | SPIFFE ID parsing and validation for mTLS client certificates | + ### Modified Files | File | Changes | |------|---------| -| `cmd/thv-operator/api/v1alpha1/mcpserver_types.go` | Add `AuthServerClientConfig` for mTLS client certs | -| `cmd/thv-operator/controllers/mcpserver_controller.go` | Create cert-manager Certificate for proxyrunner, mount CA bundle | +| `cmd/thv-operator/api/v1alpha1/mcpexternalauthconfig_types.go` | Add `authServer` type and `AuthServerConfig` struct for mTLS client certs | +| `cmd/thv-operator/controllers/mcpexternalauthconfig_controller.go` | Create cert-manager Certificate for proxyrunner when `type: authServer` | +| `cmd/thv-operator/controllers/mcpserver_controller.go` | Mount CA bundle and client cert Secret from MCPExternalAuthConfig | | `pkg/networking/http_client.go` | Add `WithClientCertificate()` to HttpClientBuilder | | `pkg/authserver/server/handlers/handler.go` | Add `subjectValidator` field, update `NewHandler()`, register token exchange route | | `pkg/authserver/storage/types.go` | Add `GetUpstreamTokensForRefresh(ctx, tsid)` method (bypasses expiry check) | @@ -1550,7 +1611,7 @@ type AuthServerClientConfig struct { **Shared Types:** 1. Create `certmanager_types.go` with shared types: - - `CertManagerIssuerReference` (used by both MCPAuthServer and MCPServer) + - `CertManagerIssuerReference` (used by both MCPAuthServer and MCPExternalAuthConfig) **MCPAuthServer CRD:** @@ -1559,7 +1620,7 @@ type AuthServerClientConfig struct { - `AuthServerTLSConfig` (serverCert, clientAuth) - `ServerCertConfig` (issuerRef, duration, renewBefore) - `ClientAuthConfig` (caBundle, allowedSubjects) - - `AllowedSubjects` (organizationalUnits, commonNamePattern) + - `AllowedSubjects` (trustDomain, allowedNamespaces) - uses SPIFFE URI patterns 3. Run `make generate` and `make manifests` ### Phase 2: Authserver Server-Side mTLS @@ -1648,27 +1709,37 @@ type AuthServerClientConfig struct { 7. Test middleware with mock authserver client 8. Test fallback behavior when authserver fails -### Phase 6: MCPServer CRD Updates +### Phase 6: MCPExternalAuthConfig CRD Updates -**CRD Changes (`cmd/thv-operator/api/v1alpha1/mcpserver_types.go`):** +**CRD Changes (`cmd/thv-operator/api/v1alpha1/mcpexternalauthconfig_types.go`):** -1. Add `AuthServerClientConfig` to `MCPServerSpec` -2. Add `ClientCertificateConfig` type (uses shared `CertManagerIssuerReference`) -3. Run `make generate` and `make manifests` +1. Add `ExternalAuthTypeAuthServer` constant to enum +2. Add `AuthServer *AuthServerConfig` field to `MCPExternalAuthConfigSpec` +3. Add `AuthServerConfig` struct (url, clientCert, caBundle) +4. Add `ClientCertificateConfig` type (issuerRef, trustDomain for SPIFFE URIs) +5. Run `make generate` and `make manifests` + +**Controller Updates (`cmd/thv-operator/controllers/mcpexternalauthconfig_controller.go`):** + +6. When `type: authServer`, create cert-manager `Certificate` for each referencing MCPServer: + - Certificate name: `{mcpserver-name}-mtls` + - CN: `{mcpserver-name}` (human-readable for audit logs) + - URI SAN: `spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name}` + - Uses default duration (90 days) and renewBefore (15 days) +7. Track certificate readiness in status ### Phase 7: MCPServer Controller Integration **Controller Updates (`cmd/thv-operator/controllers/mcpserver_controller.go`):** -1. Check for `authServerClientConfig` in spec -2. If `clientCert` specified: - - Create cert-manager `Certificate` resource - - Wait for certificate to be ready -3. Mount client cert Secret and CA bundle ConfigMap to proxyrunner pod: +1. When `externalAuthConfigRef` references an MCPExternalAuthConfig with `type: authServer`: + - Resolve the MCPExternalAuthConfig + - Get the client cert Secret name for this MCPServer +2. Mount client cert Secret and CA bundle ConfigMap to proxyrunner pod: - Add volume from Secret (client cert) - Add volume from ConfigMap (CA bundle) - Add volumeMounts to `/etc/toolhive/authserver-mtls` and `/etc/toolhive/authserver-ca` -4. Configure runconfig with authserver client paths +3. Configure runconfig with authserver client paths ### Phase 8: Authserver Service Binary diff --git a/rfcs/THV-0028-standalone-auth-server-overview.md b/rfcs/THV-0028-standalone-auth-server-overview.md index 9948369..1629ed0 100644 --- a/rfcs/THV-0028-standalone-auth-server-overview.md +++ b/rfcs/THV-0028-standalone-auth-server-overview.md @@ -9,7 +9,7 @@ ## Summary -This RFC proposes deploying `pkg/authserver` as a standalone Kubernetes service with mutual TLS (mTLS) authentication between the authserver and proxyrunner components. The design introduces a new `MCPAuthServer` CRD, extends the `MCPServer` CRD for mTLS client configuration, and implements a secure token exchange flow where proxyrunners can retrieve upstream IDP tokens from the authserver. +This RFC proposes deploying `pkg/authserver` as a standalone Kubernetes service with mutual TLS (mTLS) authentication between the authserver and proxyrunner components. The design introduces a new `MCPAuthServer` CRD, adds a new `authServer` type to `MCPExternalAuthConfig` for mTLS client configuration, and implements a secure token exchange flow where proxyrunners can retrieve upstream IDP tokens from the authserver. Client certificate identity uses SPIFFE URIs for standardized workload identification. ## Problem Statement @@ -134,11 +134,12 @@ spec: configMapRef: name: toolhive-mtls-ca-bundle key: ca.crt + # Uses SPIFFE URI format: spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name} allowedSubjects: - organizationalUnits: + trustDomain: "toolhive.local" + allowedNamespaces: - "toolhive-system" - "mcp-servers" - commonNamePattern: "^[a-z0-9-]+\\.proxyrunner\\.[a-z0-9-]+\\.toolhive\\.local$" ``` The MCPAuthServer controller creates: @@ -148,9 +149,38 @@ The MCPAuthServer controller creates: 4. **ConfigMap** for runtime configuration 5. **ServiceAccount** for RBAC -#### MCPServer CRD Updates +#### MCPExternalAuthConfig Updates (authServer type) -Add `authServerClientConfig` to configure mTLS client certificates for proxyrunners: +Add a new `authServer` type to `MCPExternalAuthConfig` for configuring mTLS client certificates. This leverages the existing external auth config pattern and allows the same authserver configuration to be shared across multiple MCPServer resources. + +**MCPExternalAuthConfig for authServer:** + +```yaml +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: MCPExternalAuthConfig +metadata: + name: main-authserver-client + namespace: mcp-servers +spec: + type: authServer + + authServer: + url: "https://mcp-authserver.toolhive-system.svc.cluster.local" + + # Controller creates cert-manager Certificate with SPIFFE URI SAN + clientCert: + trustDomain: "toolhive.local" + issuerRef: + name: toolhive-mtls-ca + kind: ClusterIssuer + + caBundle: + configMapRef: + name: toolhive-mtls-ca-bundle + key: ca.crt +``` + +**MCPServer referencing the authServer config:** ```yaml apiVersion: toolhive.stacklok.dev/v1alpha1 @@ -168,20 +198,15 @@ spec: issuer: "https://mcp-authserver.toolhive-system.svc.cluster.local" audience: "github-tools" - authServerClientConfig: - url: "https://mcp-authserver.toolhive-system.svc.cluster.local" - clientCert: - issuerRef: - name: toolhive-mtls-ca - kind: ClusterIssuer - duration: "2160h" - renewBefore: "360h" - caBundle: - configMapRef: - name: toolhive-mtls-ca-bundle - key: ca.crt + # Reference the MCPExternalAuthConfig for authserver mTLS + externalAuthConfigRef: + name: main-authserver-client ``` +The controller generates a cert-manager Certificate with: +- **CN**: `{mcpserver-name}` (human-readable for audit logs) +- **URI SAN**: `spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name}` + #### OAuth Discovery Flow (RFC 9728) Clients discover the authserver through the proxyrunner's protected resource metadata: @@ -210,13 +235,16 @@ When a proxyrunner needs upstream IDP tokens (e.g., to call a GitHub API): | Component | Changes | |-----------|---------| | `pkg/networking/http_client.go` | Add `WithClientCertificate()` method to `HttpClientBuilder` | -| `pkg/authserver/server/handlers/` | Add token exchange endpoint, subject validator, mTLS identity extraction | +| `pkg/authserver/server/handlers/` | Add token exchange endpoint, SPIFFE ID validator, mTLS identity extraction | | `pkg/authserver/storage/` | Add `GetUpstreamTokensForRefresh()` method | | `pkg/auth/authserver/` | New package for proxyrunner authserver client with caching | | `pkg/runner/config.go` | Add `AuthServerConfig` struct | | `cmd/thv-authserver/` | New service binary entry point | -| `cmd/thv-operator/api/v1alpha1/` | New CRD types for MCPAuthServer and shared cert-manager types | -| `cmd/thv-operator/controllers/` | New MCPAuthServer controller, updates to MCPServer controller | +| `cmd/thv-operator/api/v1alpha1/` | New MCPAuthServer CRD, shared cert-manager types, `authServer` type added to MCPExternalAuthConfig | +| `cmd/thv-operator/controllers/` | New MCPAuthServer controller, updates to MCPExternalAuthConfig and MCPServer controllers | + +**New Dependency:** +| `github.com/spiffe/go-spiffe/v2/spiffeid` | SPIFFE ID parsing and validation for mTLS client certificates | #### API Changes @@ -274,8 +302,8 @@ func (b *HttpClientBuilder) WithClientCertificate(certPath, keyPath string) *Htt - **Proxyrunner → Authserver**: mTLS with per-MCPServer client certificates - **Client → Authserver**: OAuth 2.0 with PKCE -- **Certificate Identity**: CN includes MCPServer name and namespace; OU contains namespace for access control -- **Subject Validation**: Configurable `allowedSubjects` restricts which namespaces/certificates can connect +- **Certificate Identity**: SPIFFE URI SAN (`spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name}`) provides standardized workload identity; CN is human-readable for audit logs +- **Subject Validation**: Configurable `allowedSubjects` restricts access by trust domain and allowed namespaces ### Data Security @@ -326,8 +354,9 @@ func (b *HttpClientBuilder) WithClientCertificate(certPath, keyPath string) *Htt ### Backward Compatibility -- Existing MCPServer deployments without `authServerClientConfig` continue to work unchanged -- The `authServerClientConfig` field is optional +- Existing MCPServer deployments without `externalAuthConfigRef` continue to work unchanged +- The `externalAuthConfigRef` field is optional +- Existing MCPExternalAuthConfig types (tokenExchange, headerInjection, bearerToken, unauthenticated) continue to work - Existing proxyrunner OIDC validation continues to work (can point directly to upstream IDP or to authserver) ### Forward Compatibility @@ -336,57 +365,6 @@ func (b *HttpClientBuilder) WithClientCertificate(certPath, keyPath string) *Htt - Certificate configuration extensible to support additional issuer types - Token exchange endpoint follows RFC 8693 patterns for future scope expansion -## Implementation Plan - -### Phase 1: MCPAuthServer CRD and Shared Types -- Create shared `CertManagerIssuerReference` type -- Create MCPAuthServer CRD types -- Run code generation - -### Phase 2: Authserver Server-Side mTLS -- Implement `SubjectValidator` for certificate validation -- Implement `extractProxyRunnerIdentity()` for mTLS identity extraction -- Implement `TokenExchangeHandler` endpoint -- Add storage methods for token refresh - -### Phase 3: HttpClientBuilder mTLS Support -- Add `WithClientCertificate()` method -- Update `Build()` to configure client certificates - -### Phase 4: ProxyRunner Authserver Client -- Create `AuthServerClient` with mTLS configuration -- Implement `TokenCache` for caching exchanged tokens -- Implement token exchange logic - -### Phase 5: RunConfig and Middleware Integration -- Extend `RunConfig` with authserver configuration -- Update token exchange middleware to use authserver client - -### Phase 6: MCPServer CRD Updates -- Add `AuthServerClientConfig` to MCPServerSpec -- Add `ClientCertificateConfig` type - -### Phase 7: MCPServer Controller Integration -- Create cert-manager Certificate for proxyrunner client certs -- Mount certificates and CA bundle to pods -- Configure runconfig with certificate paths - -### Phase 8: Authserver Service Binary -- Create `cmd/thv-authserver/` entry point -- Implement HTTP server with mTLS -- Add health/readiness endpoints -- Create Dockerfile and build configuration - -### Phase 9: MCPAuthServer Controller -- Implement controller reconciliation logic -- Create Deployment, Service, ConfigMap resources -- Handle certificate readiness - -### Phase 10: Integration and E2E Testing -- mTLS handshake tests -- Token exchange flow tests -- Full end-to-end tests with cert-manager - ## Testing Strategy - **Unit tests**: SubjectValidator, identity extraction, token cache, HTTP client builder @@ -410,9 +388,11 @@ func (b *HttpClientBuilder) WithClientCertificate(certPath, keyPath string) *Htt - [RFC 9728: OAuth 2.0 Protected Resource Metadata](https://datatracker.ietf.org/doc/html/rfc9728) - [RFC 8693: OAuth 2.0 Token Exchange](https://datatracker.ietf.org/doc/html/rfc8693) +- [SPIFFE: Secure Production Identity Framework for Everyone](https://spiffe.io/) +- [go-spiffe Library](https://github.com/spiffe/go-spiffe) - [cert-manager Documentation](https://cert-manager.io/docs/) - [Fosite OAuth2 Framework](https://github.com/ory/fosite) -- [THV-00XX-standalone-auth-server-design.md](THV-00XX-standalone-auth-server-design.md) - Detailed design document +- [THV-0028-standalone-auth-server-design.md](THV-0028-standalone-auth-server-design.md) - Detailed design document --- From b9f2196511ff8b17abbdb3b47da615c49f93ec69 Mon Sep 17 00:00:00 2001 From: Trey Date: Mon, 26 Jan 2026 12:02:30 -0800 Subject: [PATCH 7/7] Additional feedback - Support multiple signing keys - Scope `allowedSubjects` down to MCP server names (optional) - Support `upstreamIdps` in MCPAuthServer (for future multiple Idp support) - Complete `MCPAuthServer` go types code snippet --- .../THV-0028-standalone-auth-server-design.md | 174 ++++++++++++++++-- ...HV-0028-standalone-auth-server-overview.md | 39 ++-- 2 files changed, 179 insertions(+), 34 deletions(-) diff --git a/rfcs/THV-0028-standalone-auth-server-design.md b/rfcs/THV-0028-standalone-auth-server-design.md index b193151..c8c9f0a 100644 --- a/rfcs/THV-0028-standalone-auth-server-design.md +++ b/rfcs/THV-0028-standalone-auth-server-design.md @@ -58,20 +58,31 @@ spec: replicas: 2 port: 8443 - upstreamIdp: - type: oidc - oidc: - issuer: "https://accounts.google.com" - clientId: "..." - clientSecretRef: - name: authserver-secrets - key: oidc-client-secret - - signingKey: - secretRef: - name: authserver-signing-key - key: private.pem - algorithm: RS256 + # Upstream identity providers for user authentication + # Currently supports a single IDP; multiple IDPs planned for vMCP use case + upstreamIdps: + - name: google # Unique identifier for this IDP + type: oidc + oidc: + issuer: "https://accounts.google.com" + clientId: "..." + clientSecretRef: + name: authserver-secrets + key: oidc-client-secret + + # Signing keys for JWT issuance and JWKS endpoint + # First key is the active signing key; subsequent keys are advertised on JWKS for rotation + # Key IDs (kid) are computed using RFC 7638 thumbprints + signingKeys: + - secretRef: + name: authserver-signing-key + key: private.pem + algorithm: RS256 + # Previous key still advertised on JWKS during rotation period + # - secretRef: + # name: authserver-signing-key-old + # key: private.pem + # algorithm: RS256 tls: # Issuer for server certificate (controller creates the Certificate resource) @@ -95,16 +106,119 @@ spec: allowedSubjects: # Trust domain for SPIFFE URIs (required) trustDomain: "toolhive.local" - # Allow proxyrunners from specific namespaces + # Allow proxyrunners from specific namespaces (optional) allowedNamespaces: - "toolhive-system" - "mcp-servers" - "mcp-production" + # Allow only specific MCPServer names (optional) + # If not specified, all MCPServers in allowed namespaces are permitted + allowedNames: + - "github-tools" + - "slack-bot" ``` -**MCPAuthServer CRD mTLS Types:** +**MCPAuthServer CRD Types:** ```go +// MCPAuthServerSpec defines the desired state of MCPAuthServer +type MCPAuthServerSpec struct { + // Issuer is the OAuth 2.0/OIDC issuer URL for this authserver + // This is the base URL used in token "iss" claims and discovery endpoints + // +kubebuilder:validation:Required + Issuer string `json:"issuer"` + + // Replicas is the number of authserver pod replicas + // +kubebuilder:default=1 + // +optional + Replicas *int32 `json:"replicas,omitempty"` + + // Port is the HTTPS port for the authserver (default: 8443) + // +kubebuilder:default=8443 + // +optional + Port int32 `json:"port,omitempty"` + + // UpstreamIdps configures upstream identity providers for user authentication + // The authserver federates authentication to these IDPs + // Currently only a single IDP is supported; multiple IDPs planned for vMCP use case + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=1 + // +kubebuilder:validation:Required + UpstreamIdps []UpstreamIdpConfig `json:"upstreamIdps"` + + // SigningKeys configures JWT signing keys for the authserver + // The first key in the list is the active signing key used for new tokens + // Subsequent keys are included in the JWKS endpoint to support key rotation + // This allows clients to verify tokens signed with previous keys during rotation + // Key IDs (kid) are computed using RFC 7638 JWK Thumbprints + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:Required + SigningKeys []SigningKeyConfig `json:"signingKeys"` + + // TLS configures TLS and mTLS for the authserver + // +kubebuilder:validation:Required + TLS AuthServerTLSConfig `json:"tls"` +} + +// UpstreamIdpType represents the type of upstream identity provider +type UpstreamIdpType string + +const ( + // UpstreamIdpTypeOIDC is for OpenID Connect providers + UpstreamIdpTypeOIDC UpstreamIdpType = "oidc" +) + +// UpstreamIdpConfig configures an upstream identity provider for user authentication +type UpstreamIdpConfig struct { + // Name is a unique identifier for this IDP within the MCPAuthServer + // Used to reference the IDP in logs and potentially in multi-IDP routing (future) + // +kubebuilder:validation:Required + // +kubebuilder:validation:Pattern=`^[a-z0-9]([a-z0-9-]*[a-z0-9])?$` + Name string `json:"name"` + + // Type is the type of identity provider + // +kubebuilder:validation:Enum=oidc + // +kubebuilder:validation:Required + Type UpstreamIdpType `json:"type"` + + // OIDC configures an OpenID Connect identity provider + // Required when Type is "oidc" + // +optional + OIDC *OIDCIdpConfig `json:"oidc,omitempty"` +} + +// OIDCIdpConfig configures an OIDC identity provider +type OIDCIdpConfig struct { + // Issuer is the OIDC issuer URL (e.g., "https://accounts.google.com") + // +kubebuilder:validation:Required + Issuer string `json:"issuer"` + + // ClientID is the OAuth 2.0 client identifier + // +kubebuilder:validation:Required + ClientID string `json:"clientId"` + + // ClientSecretRef references a Kubernetes Secret containing the client secret + // +kubebuilder:validation:Required + ClientSecretRef SecretKeyRef `json:"clientSecretRef"` + + // Scopes is the list of OAuth 2.0 scopes to request (default: ["openid", "profile", "email"]) + // +optional + Scopes []string `json:"scopes,omitempty"` +} + +// SigningKeyConfig configures a JWT signing key for the authserver +// The key ID (kid) is computed using RFC 7638 JWK Thumbprint +type SigningKeyConfig struct { + // SecretRef references a Kubernetes Secret containing the private key + // +kubebuilder:validation:Required + SecretRef SecretKeyRef `json:"secretRef"` + + // Algorithm is the JWT signing algorithm to use with this key + // +kubebuilder:validation:Enum=RS256;RS384;RS512;ES256;ES384;ES512;EdDSA + // +kubebuilder:validation:Required + Algorithm string `json:"algorithm"` +} + // AuthServerTLSConfig configures TLS and mTLS for the authserver type AuthServerTLSConfig struct { // ServerCert configures automatic server certificate provisioning @@ -160,6 +274,12 @@ type AllowedSubjects struct { // If empty, all namespaces are allowed (only trustDomain is validated) // +optional AllowedNamespaces []string `json:"allowedNamespaces,omitempty"` + + // AllowedNames is a list of MCPServer names that are allowed to connect + // When specified, only MCPServers with matching names are permitted + // If empty, all MCPServer names are allowed (subject to namespace restrictions) + // +optional + AllowedNames []string `json:"allowedNames,omitempty"` } ``` @@ -979,6 +1099,8 @@ type SubjectValidator struct { trustDomain spiffeid.TrustDomain // allowedNamespaces is a set of allowed Kubernetes namespaces (nil means all allowed) allowedNamespaces map[string]bool + // allowedNames is a set of allowed MCPServer names (nil means all allowed) + allowedNames map[string]bool } // NewSubjectValidator creates a validator from MCPAuthServer CRD configuration @@ -1006,6 +1128,14 @@ func NewSubjectValidator(allowedSubjects *AllowedSubjects) (*SubjectValidator, e } } + // Build allowed names set + if len(allowedSubjects.AllowedNames) > 0 { + validator.allowedNames = make(map[string]bool, len(allowedSubjects.AllowedNames)) + for _, name := range allowedSubjects.AllowedNames { + validator.allowedNames[name] = true + } + } + return validator, nil } @@ -1015,6 +1145,7 @@ func NewSubjectValidator(allowedSubjects *AllowedSubjects) (*SubjectValidator, e // Validates: // 1. SPIFFE trust domain matches the configured trust domain // 2. Namespace is in the allowed list (if configured) +// 3. MCPServer name is in the allowed list (if configured) // // Returns nil if allowed, error if rejected. func (v *SubjectValidator) validateSubjectAllowed(identity *ProxyRunnerIdentity) error { @@ -1036,6 +1167,13 @@ func (v *SubjectValidator) validateSubjectAllowed(identity *ProxyRunnerIdentity) } } + // Check name restriction (if configured) + if v.allowedNames != nil { + if !v.allowedNames[identity.Name] { + return fmt.Errorf("MCPServer name %q is not in allowed list", identity.Name) + } + } + return nil } @@ -1616,11 +1754,11 @@ type AuthServerClientConfig struct { **MCPAuthServer CRD:** 2. Create `mcpauthserver_types.go` with CRD types: - - `MCPAuthServerSpec` (issuer, replicas, upstreamIdp, signingKey, tls) + - `MCPAuthServerSpec` (issuer, replicas, upstreamIdps, signingKeys, tls) - `AuthServerTLSConfig` (serverCert, clientAuth) - `ServerCertConfig` (issuerRef, duration, renewBefore) - `ClientAuthConfig` (caBundle, allowedSubjects) - - `AllowedSubjects` (trustDomain, allowedNamespaces) - uses SPIFFE URI patterns + - `AllowedSubjects` (trustDomain, allowedNamespaces, allowedNames) - uses SPIFFE URI patterns 3. Run `make generate` and `make manifests` ### Phase 2: Authserver Server-Side mTLS diff --git a/rfcs/THV-0028-standalone-auth-server-overview.md b/rfcs/THV-0028-standalone-auth-server-overview.md index 1629ed0..aff2166 100644 --- a/rfcs/THV-0028-standalone-auth-server-overview.md +++ b/rfcs/THV-0028-standalone-auth-server-overview.md @@ -106,20 +106,25 @@ spec: replicas: 2 port: 8443 - upstreamIdp: - type: oidc - oidc: - issuer: "https://accounts.google.com" - clientId: "..." - clientSecretRef: - name: authserver-secrets - key: oidc-client-secret - - signingKey: - secretRef: - name: authserver-signing-key - key: private.pem - algorithm: RS256 + # Upstream identity providers (single IDP supported now; multiple planned for vMCP) + upstreamIdps: + - name: google + type: oidc + oidc: + issuer: "https://accounts.google.com" + clientId: "..." + clientSecretRef: + name: authserver-secrets + key: oidc-client-secret + + # Signing keys for JWT issuance and JWKS endpoint + # First key is active; subsequent keys advertised on JWKS for rotation + # Key IDs (kid) computed using RFC 7638 thumbprints + signingKeys: + - secretRef: + name: authserver-signing-key + key: private.pem + algorithm: RS256 tls: serverCert: @@ -137,9 +142,11 @@ spec: # Uses SPIFFE URI format: spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name} allowedSubjects: trustDomain: "toolhive.local" - allowedNamespaces: + allowedNamespaces: # Optional: restrict by namespace - "toolhive-system" - "mcp-servers" + allowedNames: # Optional: restrict to specific MCPServer names + - "github-tools" ``` The MCPAuthServer controller creates: @@ -303,7 +310,7 @@ func (b *HttpClientBuilder) WithClientCertificate(certPath, keyPath string) *Htt - **Proxyrunner → Authserver**: mTLS with per-MCPServer client certificates - **Client → Authserver**: OAuth 2.0 with PKCE - **Certificate Identity**: SPIFFE URI SAN (`spiffe://{trustDomain}/ns/{namespace}/mcpserver/{name}`) provides standardized workload identity; CN is human-readable for audit logs -- **Subject Validation**: Configurable `allowedSubjects` restricts access by trust domain and allowed namespaces +- **Subject Validation**: Configurable `allowedSubjects` restricts access by trust domain, allowed namespaces, and optionally specific MCPServer names ### Data Security