Skip to content

Commit d147985

Browse files
scotwellsclaude
andcommitted
fix: remove WD watch predicate that wedged new WorkloadDeployments
The #37 predicate (wdReferencedDataChangedPredicate) on the WorkloadDeployment For() watch dropped metadata-only Update events. A new WD's first reconcile adds the workload-controller finalizer and returns; that finalizer-add produces an Update with no generation bump and no ReferencedDataReady change, so the predicate filtered it and the WD never got a second reconcile — wedging every new WD at the finalizer stage until a controller restart. Remove the predicate entirely. The equality.Semantic.DeepEqual guard before Status().Update (added in the same #37 change) already prevents the self-trigger loop the predicate was meant to stop, and watching all updates lets new WDs proceed past the finalizer stage immediately. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> (cherry picked from commit b94a40a)
1 parent f78d5d8 commit d147985

2 files changed

Lines changed: 7 additions & 356 deletions

File tree

internal/controller/workloaddeployment_controller.go

Lines changed: 7 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@ import (
1818
"sigs.k8s.io/controller-runtime/pkg/client"
1919
"sigs.k8s.io/controller-runtime/pkg/cluster"
2020
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
21-
"sigs.k8s.io/controller-runtime/pkg/event"
2221
"sigs.k8s.io/controller-runtime/pkg/finalizer"
2322
"sigs.k8s.io/controller-runtime/pkg/handler"
2423
"sigs.k8s.io/controller-runtime/pkg/log"
25-
"sigs.k8s.io/controller-runtime/pkg/predicate"
2624
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2725
mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder"
2826
mccontext "sigs.k8s.io/multicluster-runtime/pkg/context"
@@ -318,68 +316,6 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates(
318316
return currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, referencedDataBlockedReplicas, nil
319317
}
320318

321-
// wdReferencedDataChangedPredicate returns a predicate for the WorkloadDeployment
322-
// For() watch that fires on:
323-
// - Any Create, Delete, or Generic event (always enqueue).
324-
// - An Update event where metadata.generation changed (spec updated), OR where
325-
// the ReferencedDataReady condition's Status, Reason, or Message changed.
326-
//
327-
// The predicate intentionally does NOT fire when only the Available or
328-
// ReplicasReady conditions change, because those are written by this reconciler
329-
// itself. Without this guard the reconciler's own Status().Update would re-enqueue
330-
// itself on every run, creating a tight reconcile loop. The equality check before
331-
// Status().Update is a complementary guard, but the predicate is the primary
332-
// protection: it prevents re-enqueuing entirely so the workqueue stays quiet between
333-
// meaningful state transitions.
334-
//
335-
// Loop prevention: the ReferencedDataController (the only other writer of the
336-
// ReferencedDataReady condition) is the intended trigger. When it sets
337-
// ReferencedDataReady=False/SourceNotFound the predicate passes and this
338-
// reconciler re-runs, sees the resolver verdict in deployment.Status.Conditions, and
339-
// promotes Available to ReferencedDataNotReady. Subsequent runs by this reconciler
340-
// (which write Available but not ReferencedDataReady) are filtered out.
341-
func wdReferencedDataChangedPredicate() predicate.Predicate {
342-
return predicate.Funcs{
343-
UpdateFunc: func(e event.UpdateEvent) bool {
344-
oldWD, ok1 := e.ObjectOld.(*computev1alpha.WorkloadDeployment)
345-
newWD, ok2 := e.ObjectNew.(*computev1alpha.WorkloadDeployment)
346-
if !ok1 || !ok2 {
347-
return true // be conservative when type assertion fails
348-
}
349-
// Spec change: always reconcile.
350-
if oldWD.Generation != newWD.Generation {
351-
return true
352-
}
353-
// ReferencedDataReady condition changed: reconcile so Available is
354-
// updated to reflect the resolver's verdict.
355-
return wdRefDataCondChanged(
356-
apimeta.FindStatusCondition(oldWD.Status.Conditions, computev1alpha.ReferencedDataReady),
357-
apimeta.FindStatusCondition(newWD.Status.Conditions, computev1alpha.ReferencedDataReady),
358-
)
359-
},
360-
CreateFunc: func(_ event.CreateEvent) bool { return true },
361-
DeleteFunc: func(_ event.DeleteEvent) bool { return true },
362-
GenericFunc: func(_ event.GenericEvent) bool { return true },
363-
}
364-
}
365-
366-
// wdRefDataCondChanged returns true when the ReferencedDataReady condition's
367-
// observable fields (Status, Reason, Message) differ between old and new. Presence
368-
// changes (nil → non-nil or vice versa) are also treated as a change. The
369-
// LastTransitionTime field is excluded because it changes on every status flip and
370-
// would defeat the loop-prevention intent of wdReferencedDataChangedPredicate.
371-
func wdRefDataCondChanged(old, new *metav1.Condition) bool {
372-
if (old == nil) != (new == nil) {
373-
return true // condition was added or removed
374-
}
375-
if old == nil {
376-
return false // both nil — no change
377-
}
378-
return old.Status != new.Status ||
379-
old.Reason != new.Reason ||
380-
old.Message != new.Message
381-
}
382-
383319
// selectWDBlockingCondition evaluates all blocking causes for a WorkloadDeployment
384320
// that has no ready replicas and returns the Available condition reflecting the
385321
// highest-priority blocker. All causes are evaluated before selecting the winner
@@ -805,15 +741,15 @@ func (r *WorkloadDeploymentReconciler) SetupWithManager(mgr mcmanager.Manager, o
805741
}
806742

807743
b := mcbuilder.ControllerManagedBy(mgr).
808-
// The predicate gates re-enqueuing on meaningful WD changes: spec updates
809-
// (generation bump) or a ReferencedDataReady condition change written by
810-
// ReferencedDataController. Without it, each Status().Update by this
811-
// reconciler (writing Available/ReplicasReady) would re-enqueue itself,
812-
// creating a tight loop and delaying the ReferencedDataReady signal from
813-
// the resolver.
744+
// Watch all WorkloadDeployment events. The reconciler's own Status().Update
745+
// cannot create a self-trigger loop because the equality.Semantic.DeepEqual
746+
// guard skips the write when nothing changed, so no self-event is produced.
747+
// We deliberately do NOT filter Update events with a predicate: an earlier
748+
// predicate that only passed generation/ReferencedDataReady changes dropped
749+
// metadata-only updates such as the initial finalizer-add, which wedged new
750+
// WorkloadDeployments until a controller restart.
814751
For(&computev1alpha.WorkloadDeployment{},
815752
mcbuilder.WithEngageWithLocalCluster(false),
816-
mcbuilder.WithPredicates(wdReferencedDataChangedPredicate()),
817753
).
818754
Owns(&computev1alpha.Instance{})
819755

0 commit comments

Comments
 (0)