@@ -49,6 +49,15 @@ const (
4949 defaultNamespace = "default"
5050 // serviceFieldManager is the field manager name for server-side apply operations
5151 serviceFieldManager = "toolhive-container-manager"
52+
53+ // RunConfigMCPServerGenerationAnnotation carries the MCPServer .metadata.generation that
54+ // produced the RunConfig applied to this StatefulSet. Used as a monotonic version stamp
55+ // to prevent stale proxyrunner pods (from an old Deployment ReplicaSet) from clobbering
56+ // a newer RunConfig's apply. The gate only becomes effective once proxyrunner is upgraded
57+ // to a version that reads this annotation; operator-only upgrades leave the race window
58+ // in place until proxyrunner is also rolled. Exported because it forms a wire contract
59+ // that external readers (operator, diagnostic tooling) may consume.
60+ RunConfigMCPServerGenerationAnnotation = "toolhive.stacklok.dev/mcpserver-generation"
5261)
5362
5463// RuntimeName is the name identifier for the Kubernetes runtime
@@ -397,22 +406,26 @@ func (c *Client) DeployWorkload(ctx context.Context,
397406 return 0 , err
398407 }
399408
400- // Create an apply configuration for the statefulset
401- statefulSetApply := appsv1apply .StatefulSet (containerName , namespace ).
402- WithLabels (containerLabels ).
403- WithSpec (buildStatefulSetSpec (containerName , podTemplateSpec , options ))
404-
405- // Apply the statefulset using server-side apply
406- createdStatefulSet , err := c .client .AppsV1 ().StatefulSets (namespace ).
407- Apply (ctx , statefulSetApply , metav1.ApplyOptions {
408- FieldManager : serviceFieldManager ,
409- Force : true ,
410- })
409+ ourGen := runConfigGeneration (options )
410+ skip , err := c .shouldSkipStatefulSetApply (ctx , namespace , containerName , ourGen )
411411 if err != nil {
412- return 0 , fmt .Errorf ("failed to apply statefulset: %w" , err )
412+ return 0 , err
413+ }
414+ if skip {
415+ // Intentionally skip ensureBackendServices in the gated path: this pod's RunConfig
416+ // is stale, so reconciling services here would clobber port/config fields set by
417+ // the newer-generation pod under the same field manager + Force: true — the same
418+ // race this gate prevents for the StatefulSet. The newer pod already reconciled
419+ // services; if that failed, it returns an error and retries on its own.
420+ return 0 , nil
413421 }
414422
415- slog .Debug ("applied statefulset" , "name" , createdStatefulSet .Name )
423+ createdStatefulSet , err := c .applyStatefulSet (
424+ ctx , namespace , containerName , containerLabels , podTemplateSpec , options , ourGen ,
425+ )
426+ if err != nil {
427+ return 0 , err
428+ }
416429
417430 err = c .ensureBackendServices (
418431 ctx , containerName , namespace , containerLabels , transportType , options , createdStatefulSet )
@@ -435,6 +448,84 @@ func (c *Client) DeployWorkload(ctx context.Context,
435448 return 0 , nil
436449}
437450
451+ // runConfigGeneration extracts the RunConfig MCPServer generation from options,
452+ // returning 0 when options is nil (backward-compat / non-operator callers).
453+ func runConfigGeneration (options * runtime.DeployWorkloadOptions ) int64 {
454+ if options == nil {
455+ return 0
456+ }
457+ return options .RunConfigMCPServerGeneration
458+ }
459+
460+ // applyStatefulSet stamps the MCPServer generation annotation when non-zero,
461+ // builds the StatefulSet apply configuration, and performs the server-side apply.
462+ func (c * Client ) applyStatefulSet (
463+ ctx context.Context ,
464+ namespace , containerName string ,
465+ containerLabels map [string ]string ,
466+ podTemplateSpec * corev1apply.PodTemplateSpecApplyConfiguration ,
467+ options * runtime.DeployWorkloadOptions ,
468+ ourGen int64 ,
469+ ) (* appsv1.StatefulSet , error ) {
470+ if ourGen > 0 {
471+ podTemplateSpec = podTemplateSpec .WithAnnotations (map [string ]string {
472+ RunConfigMCPServerGenerationAnnotation : strconv .FormatInt (ourGen , 10 ),
473+ })
474+ }
475+ statefulSetApply := appsv1apply .StatefulSet (containerName , namespace ).
476+ WithLabels (containerLabels ).
477+ WithSpec (buildStatefulSetSpec (containerName , podTemplateSpec , options ))
478+ createdStatefulSet , err := c .client .AppsV1 ().StatefulSets (namespace ).
479+ Apply (ctx , statefulSetApply , metav1.ApplyOptions {
480+ FieldManager : serviceFieldManager ,
481+ Force : true ,
482+ })
483+ if err != nil {
484+ return nil , fmt .Errorf ("failed to apply statefulset: %w" , err )
485+ }
486+ slog .Debug ("applied statefulset" , "name" , createdStatefulSet .Name )
487+ return createdStatefulSet , nil
488+ }
489+
490+ // shouldSkipStatefulSetApply returns true when the existing StatefulSet is already
491+ // stamped with a strictly greater MCPServer generation than ours, meaning a newer
492+ // proxyrunner pod has already reconciled the workload and ours would be a regression.
493+ // Returns false (apply as normal) when ourGen is zero or negative, when the StatefulSet
494+ // does not yet exist, when the annotation is absent, or when the annotation is unparsable.
495+ func (c * Client ) shouldSkipStatefulSetApply (
496+ ctx context.Context , namespace , name string , ourGen int64 ,
497+ ) (bool , error ) {
498+ if ourGen <= 0 {
499+ return false , nil
500+ }
501+ existing , err := c .client .AppsV1 ().StatefulSets (namespace ).Get (ctx , name , metav1.GetOptions {})
502+ if err != nil {
503+ if errors .IsNotFound (err ) {
504+ return false , nil
505+ }
506+ return false , fmt .Errorf ("failed to get existing statefulset: %w" , err )
507+ }
508+ if existing .Spec .Template .Annotations == nil {
509+ return false , nil
510+ }
511+ theirs := existing .Spec .Template .Annotations [RunConfigMCPServerGenerationAnnotation ]
512+ if theirs == "" {
513+ return false , nil
514+ }
515+ theirsGen , parseErr := strconv .ParseInt (theirs , 10 , 64 )
516+ if parseErr != nil {
517+ slog .Warn ("unparsable mcpserver-generation annotation; proceeding with apply" ,
518+ "sts" , name , "value" , theirs , "err" , parseErr )
519+ return false , nil
520+ }
521+ if theirsGen > ourGen {
522+ slog .Debug ("skipping StatefulSet apply; newer MCPServer generation already applied" ,
523+ "sts" , name , "ours" , ourGen , "theirs" , theirsGen )
524+ return true , nil
525+ }
526+ return false , nil
527+ }
528+
438529// buildStatefulSetSpec constructs the StatefulSet spec apply configuration.
439530// WithReplicas is only included when BackendReplicas is explicitly set; omitting
440531// the field lets the existing field manager (e.g. HPA or kubectl) retain control
0 commit comments