Skip to content

Commit 13043a9

Browse files
stuggiclaude
andcommitted
Fix MinorUpdateOVNDataplane stuck when reconciles race
When the OVN image is unchanged between versions, the version controller uses a condition-based state machine to track whether an OVN dataplane deployment was observed running. When multiple reconciles race at the same timestamp, the condition state from the previous reconcile may not have persisted yet, causing the state machine to reset and loop indefinitely on "Waiting for OVN Dataplane deployment to be created". Add IsDataplaneDeploymentCompletedForServiceType() to check for a completed deployment with the target version for a given service type. Use it as a fallback when the saved condition state doesn't show a previously observed deployment — if a completed deployment exists, proceed instead of waiting forever. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Martin Schuppert <mschuppert@redhat.com>
1 parent d84076f commit 13043a9

4 files changed

Lines changed: 423 additions & 18 deletions

File tree

internal/controller/core/openstackversion_controller.go

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,13 @@ import (
2626
"k8s.io/apimachinery/pkg/runtime"
2727
"k8s.io/client-go/kubernetes"
2828
ctrl "sigs.k8s.io/controller-runtime"
29+
"sigs.k8s.io/controller-runtime/pkg/builder"
2930
"sigs.k8s.io/controller-runtime/pkg/client"
3031
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
32+
"sigs.k8s.io/controller-runtime/pkg/event"
3133
"sigs.k8s.io/controller-runtime/pkg/handler"
3234
"sigs.k8s.io/controller-runtime/pkg/log"
35+
"sigs.k8s.io/controller-runtime/pkg/predicate"
3336
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3437

3538
"github.com/go-logr/logr"
@@ -82,6 +85,8 @@ func (r *OpenStackVersionReconciler) GetLogger(ctx context.Context) logr.Logger
8285
// +kubebuilder:rbac:groups=core.openstack.org,resources=openstackversions/finalizers,verbs=update;patch
8386
// +kubebuilder:rbac:groups=core.openstack.org,resources=openstackcontrolplanes,verbs=get;list;watch
8487
// +kubebuilder:rbac:groups=dataplane.openstack.org,resources=openstackdataplanenodesets,verbs=get;list;watch
88+
// +kubebuilder:rbac:groups=dataplane.openstack.org,resources=openstackdataplanedeployments,verbs=get;list;watch
89+
// +kubebuilder:rbac:groups=dataplane.openstack.org,resources=openstackdataplaneservices,verbs=get;list;watch
8590

8691
// Reconcile is part of the main kubernetes reconciliation loop which aims to
8792
// move the current state of the cluster closer to the desired state.
@@ -344,16 +349,24 @@ func (r *OpenStackVersionReconciler) Reconcile(ctx context.Context, req ctrl.Req
344349
prevOvnDataplaneCond := savedConditions.Get(corev1beta1.OpenStackVersionMinorUpdateOVNDataplane)
345350
if prevOvnDataplaneCond == nil ||
346351
prevOvnDataplaneCond.Reason != condition.RequestedReason {
347-
// We have never observed a running OVN deployment in
348-
// this update cycle — the deployment has not been
349-
// created yet. Keep waiting.
350-
instance.Status.Conditions.Set(condition.FalseCondition(
351-
corev1beta1.OpenStackVersionMinorUpdateOVNDataplane,
352-
condition.InitReason,
353-
condition.SeverityInfo,
354-
corev1beta1.OpenStackVersionMinorUpdateReadyRunningMessage))
355-
Log.Info("Waiting for OVN Dataplane deployment to be created (OVN image unchanged between versions)")
356-
return ctrl.Result{}, nil
352+
// The saved condition doesn't show we observed a running
353+
// deployment (may be due to reconcile race). Check if a
354+
// completed deployment already exists for the target version.
355+
ovnDeploymentCompleted, err := openstack.IsDataplaneDeploymentCompletedForServiceType(
356+
ctx, versionHelper, instance.Namespace, dataplaneNodesets, "ovn", instance.Spec.TargetVersion)
357+
if err != nil {
358+
return ctrl.Result{}, err
359+
}
360+
if !ovnDeploymentCompleted {
361+
instance.Status.Conditions.Set(condition.FalseCondition(
362+
corev1beta1.OpenStackVersionMinorUpdateOVNDataplane,
363+
condition.InitReason,
364+
condition.SeverityInfo,
365+
corev1beta1.OpenStackVersionMinorUpdateReadyRunningMessage))
366+
Log.Info("Waiting for OVN Dataplane deployment to be created (OVN image unchanged between versions)")
367+
return ctrl.Result{}, nil
368+
}
369+
Log.Info("OVN Dataplane deployment completed (found completed deployment for target version)")
357370
}
358371
// Previously saw a running OVN deployment (condition was
359372
// False/RequestedReason), now no OVN deployment is running
@@ -526,9 +539,38 @@ func (r *OpenStackVersionReconciler) SetupWithManager(mgr ctrl.Manager) error {
526539
return nil
527540
})
528541

542+
// Only reconcile when a deployment's Deployed status changes,
543+
// not on every status update during deployment execution.
544+
// The dataplane controller sets Deployed and DeployedVersion
545+
// in the same status update, so checking Deployed alone is sufficient.
546+
deploymentStatusPredicate := predicate.Funcs{
547+
CreateFunc: func(_ event.CreateEvent) bool {
548+
return true
549+
},
550+
UpdateFunc: func(e event.UpdateEvent) bool {
551+
oldDepl, ok := e.ObjectOld.(*dataplanev1.OpenStackDataPlaneDeployment)
552+
if !ok {
553+
return false
554+
}
555+
newDepl, ok := e.ObjectNew.(*dataplanev1.OpenStackDataPlaneDeployment)
556+
if !ok {
557+
return false
558+
}
559+
return oldDepl.Status.Deployed != newDepl.Status.Deployed
560+
},
561+
DeleteFunc: func(_ event.DeleteEvent) bool {
562+
return false
563+
},
564+
GenericFunc: func(_ event.GenericEvent) bool {
565+
return false
566+
},
567+
}
568+
529569
return ctrl.NewControllerManagedBy(mgr).
530570
Watches(&corev1beta1.OpenStackControlPlane{}, versionFunc).
531571
Watches(&dataplanev1.OpenStackDataPlaneNodeSet{}, versionFunc).
572+
Watches(&dataplanev1.OpenStackDataPlaneDeployment{}, versionFunc,
573+
builder.WithPredicates(deploymentStatusPredicate)).
532574
For(&corev1beta1.OpenStackVersion{}).
533575
Complete(r)
534576
}

internal/openstack/dataplane.go

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,18 @@ func DataplaneNodesetsOVNControllerImagesMatch(version *corev1beta1.OpenStackVer
6060
return true
6161
}
6262

63-
// IsDataplaneDeploymentRunningForServiceType checks whether any in-progress
64-
// OpenStackDataPlaneDeployment is deploying a service with the given
65-
// EDPMServiceType (e.g. "ovn"). It resolves which services each deployment
66-
// runs (from ServicesOverride or the nodeset's service list) and inspects
67-
// the service's EDPMServiceType to determine if it matches.
68-
func IsDataplaneDeploymentRunningForServiceType(
63+
// deploymentFilter is a predicate that selects which deployments to consider.
64+
type deploymentFilter func(*dataplanev1.OpenStackDataPlaneDeployment) bool
65+
66+
// findDataplaneDeploymentForServiceType checks if any deployment matching the
67+
// filter includes the given service type for at least one nodeset with nodes.
68+
func findDataplaneDeploymentForServiceType(
6969
ctx context.Context,
7070
h *helper.Helper,
7171
namespace string,
7272
dataplaneNodesets *dataplanev1.OpenStackDataPlaneNodeSetList,
7373
serviceType string,
74+
filter deploymentFilter,
7475
) (bool, error) {
7576
// List all deployments in the namespace
7677
deployments := &dataplanev1.OpenStackDataPlaneDeploymentList{}
@@ -92,8 +93,7 @@ func IsDataplaneDeploymentRunningForServiceType(
9293
serviceCache := make(map[string]*dataplanev1.OpenStackDataPlaneService)
9394

9495
for _, deployment := range deployments.Items {
95-
// Skip completed deployments
96-
if deployment.Status.Deployed {
96+
if !filter(&deployment) {
9797
continue
9898
}
9999

@@ -139,6 +139,39 @@ func IsDataplaneDeploymentRunningForServiceType(
139139
return false, nil
140140
}
141141

142+
// IsDataplaneDeploymentRunningForServiceType checks if a non-completed
143+
// deployment exists that includes the given service type.
144+
func IsDataplaneDeploymentRunningForServiceType(
145+
ctx context.Context,
146+
h *helper.Helper,
147+
namespace string,
148+
dataplaneNodesets *dataplanev1.OpenStackDataPlaneNodeSetList,
149+
serviceType string,
150+
) (bool, error) {
151+
return findDataplaneDeploymentForServiceType(ctx, h, namespace, dataplaneNodesets, serviceType,
152+
func(d *dataplanev1.OpenStackDataPlaneDeployment) bool {
153+
return !d.Status.Deployed
154+
})
155+
}
156+
157+
// IsDataplaneDeploymentCompletedForServiceType checks if a completed deployment
158+
// exists for the given service type with the target version. This is used during
159+
// minor updates when the OVN image is unchanged between versions — the image
160+
// match alone is not sufficient, so we check for a completed deployment instead.
161+
func IsDataplaneDeploymentCompletedForServiceType(
162+
ctx context.Context,
163+
h *helper.Helper,
164+
namespace string,
165+
dataplaneNodesets *dataplanev1.OpenStackDataPlaneNodeSetList,
166+
serviceType string,
167+
targetVersion string,
168+
) (bool, error) {
169+
return findDataplaneDeploymentForServiceType(ctx, h, namespace, dataplaneNodesets, serviceType,
170+
func(d *dataplanev1.OpenStackDataPlaneDeployment) bool {
171+
return d.Status.Deployed && d.Status.DeployedVersion == targetVersion
172+
})
173+
}
174+
142175
// DataplaneNodesetsDeployed returns true if all nodesets are deployed with the latest version
143176
func DataplaneNodesetsDeployed(version *corev1beta1.OpenStackVersion, dataplaneNodesets *dataplanev1.OpenStackDataPlaneNodeSetList) bool {
144177
for _, nodeset := range dataplaneNodesets.Items {

0 commit comments

Comments
 (0)