Skip to content

Commit ee5f0ef

Browse files
scotwellsclaude
andcommitted
fix: remove WD watch predicate that wedged new WorkloadDeployments
The #37 predicate (wdReferencedDataChangedPredicate) on the WorkloadDeployment For() watch dropped metadata-only Update events. A new WD's first reconcile adds the workload-controller finalizer and returns; that finalizer-add produces an Update with no generation bump and no ReferencedDataReady change, so the predicate filtered it and the WD never got a second reconcile — wedging every new WD at the finalizer stage until a controller restart. Remove the predicate entirely. The equality.Semantic.DeepEqual guard before Status().Update (added in the same #37 change) already prevents the self-trigger loop the predicate was meant to stop, and watching all updates lets new WDs proceed past the finalizer stage immediately. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> (cherry picked from commit b94a40a)
1 parent a3ae91e commit ee5f0ef

2 files changed

Lines changed: 11 additions & 362 deletions

File tree

internal/controller/workloaddeployment_controller.go

Lines changed: 11 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@ import (
1818
"sigs.k8s.io/controller-runtime/pkg/client"
1919
"sigs.k8s.io/controller-runtime/pkg/cluster"
2020
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
21-
"sigs.k8s.io/controller-runtime/pkg/event"
2221
"sigs.k8s.io/controller-runtime/pkg/finalizer"
2322
"sigs.k8s.io/controller-runtime/pkg/handler"
2423
"sigs.k8s.io/controller-runtime/pkg/log"
25-
"sigs.k8s.io/controller-runtime/pkg/predicate"
2624
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2725
mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder"
2826
mccontext "sigs.k8s.io/multicluster-runtime/pkg/context"
@@ -238,12 +236,10 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco
238236
apimeta.SetStatusCondition(&deployment.Status.Conditions, availCond)
239237
}
240238

241-
// Skip the write when the status is unchanged. Without this guard the
242-
// reconciler's own Status().Update would always produce a new resourceVersion,
243-
// firing another Update event on the WD and creating an infinite reconcile loop
244-
// before the predicate on For() was added. The guard is a belt-and-suspenders
245-
// complement to the predicate: the predicate prevents re-queuing on own writes,
246-
// and this guard avoids the superfluous API call entirely.
239+
// Skip the write when the status is unchanged. The unfiltered For() watch
240+
// re-enqueues this deployment on every status write, so an unconditional
241+
// Status().Update would re-trigger the reconciler on its own writes; this
242+
// guard breaks that loop and avoids the superfluous API call entirely.
247243
if !equality.Semantic.DeepEqual(existingStatus, deployment.Status) {
248244
if err := cl.GetClient().Status().Update(ctx, &deployment); err != nil {
249245
return ctrl.Result{}, fmt.Errorf("failed updating deployment status: %w", err)
@@ -313,68 +309,6 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates(
313309
return currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, referencedDataBlockedReplicas, nil
314310
}
315311

316-
// wdReferencedDataChangedPredicate returns a predicate for the WorkloadDeployment
317-
// For() watch that fires on:
318-
// - Any Create, Delete, or Generic event (always enqueue).
319-
// - An Update event where metadata.generation changed (spec updated), OR where
320-
// the ReferencedDataReady condition's Status, Reason, or Message changed.
321-
//
322-
// The predicate intentionally does NOT fire when only the Available or
323-
// ReplicasReady conditions change, because those are written by this reconciler
324-
// itself. Without this guard the reconciler's own Status().Update would re-enqueue
325-
// itself on every run, creating a tight reconcile loop. The equality check before
326-
// Status().Update is a complementary guard, but the predicate is the primary
327-
// protection: it prevents re-enqueuing entirely so the workqueue stays quiet between
328-
// meaningful state transitions.
329-
//
330-
// Loop prevention: the ReferencedDataController (the only other writer of the
331-
// ReferencedDataReady condition) is the intended trigger. When it sets
332-
// ReferencedDataReady=False/SourceNotFound the predicate passes and this
333-
// reconciler re-runs, sees the resolver verdict in deployment.Status.Conditions, and
334-
// promotes Available to ReferencedDataNotReady. Subsequent runs by this reconciler
335-
// (which write Available but not ReferencedDataReady) are filtered out.
336-
func wdReferencedDataChangedPredicate() predicate.Predicate {
337-
return predicate.Funcs{
338-
UpdateFunc: func(e event.UpdateEvent) bool {
339-
oldWD, ok1 := e.ObjectOld.(*computev1alpha.WorkloadDeployment)
340-
newWD, ok2 := e.ObjectNew.(*computev1alpha.WorkloadDeployment)
341-
if !ok1 || !ok2 {
342-
return true // be conservative when type assertion fails
343-
}
344-
// Spec change: always reconcile.
345-
if oldWD.Generation != newWD.Generation {
346-
return true
347-
}
348-
// ReferencedDataReady condition changed: reconcile so Available is
349-
// updated to reflect the resolver's verdict.
350-
return wdRefDataCondChanged(
351-
apimeta.FindStatusCondition(oldWD.Status.Conditions, computev1alpha.ReferencedDataReady),
352-
apimeta.FindStatusCondition(newWD.Status.Conditions, computev1alpha.ReferencedDataReady),
353-
)
354-
},
355-
CreateFunc: func(_ event.CreateEvent) bool { return true },
356-
DeleteFunc: func(_ event.DeleteEvent) bool { return true },
357-
GenericFunc: func(_ event.GenericEvent) bool { return true },
358-
}
359-
}
360-
361-
// wdRefDataCondChanged returns true when the ReferencedDataReady condition's
362-
// observable fields (Status, Reason, Message) differ between old and new. Presence
363-
// changes (nil → non-nil or vice versa) are also treated as a change. The
364-
// LastTransitionTime field is excluded because it changes on every status flip and
365-
// would defeat the loop-prevention intent of wdReferencedDataChangedPredicate.
366-
func wdRefDataCondChanged(old, new *metav1.Condition) bool {
367-
if (old == nil) != (new == nil) {
368-
return true // condition was added or removed
369-
}
370-
if old == nil {
371-
return false // both nil — no change
372-
}
373-
return old.Status != new.Status ||
374-
old.Reason != new.Reason ||
375-
old.Message != new.Message
376-
}
377-
378312
// selectWDBlockingCondition evaluates all blocking causes for a WorkloadDeployment
379313
// that has no ready replicas and returns the Available condition reflecting the
380314
// highest-priority blocker. All causes are evaluated before selecting the winner
@@ -772,15 +706,15 @@ func (r *WorkloadDeploymentReconciler) SetupWithManager(mgr mcmanager.Manager, o
772706
}
773707

774708
b := mcbuilder.ControllerManagedBy(mgr).
775-
// The predicate gates re-enqueuing on meaningful WD changes: spec updates
776-
// (generation bump) or a ReferencedDataReady condition change written by
777-
// ReferencedDataController. Without it, each Status().Update by this
778-
// reconciler (writing Available/ReplicasReady) would re-enqueue itself,
779-
// creating a tight loop and delaying the ReferencedDataReady signal from
780-
// the resolver.
709+
// Watch all WorkloadDeployment events. The reconciler's own Status().Update
710+
// cannot create a self-trigger loop because the equality.Semantic.DeepEqual
711+
// guard skips the write when nothing changed, so no self-event is produced.
712+
// We deliberately do NOT filter Update events with a predicate: an earlier
713+
// predicate that only passed generation/ReferencedDataReady changes dropped
714+
// metadata-only updates such as the initial finalizer-add, which wedged new
715+
// WorkloadDeployments until a controller restart.
781716
For(&computev1alpha.WorkloadDeployment{},
782717
mcbuilder.WithEngageWithLocalCluster(false),
783-
mcbuilder.WithPredicates(wdReferencedDataChangedPredicate()),
784718
).
785719
Owns(&computev1alpha.Instance{})
786720

0 commit comments

Comments
 (0)