88 "fmt"
99 "slices"
1010
11+ "k8s.io/apimachinery/pkg/api/equality"
1112 apierrors "k8s.io/apimachinery/pkg/api/errors"
1213 apimeta "k8s.io/apimachinery/pkg/api/meta"
1314 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -17,9 +18,11 @@ import (
1718 "sigs.k8s.io/controller-runtime/pkg/client"
1819 "sigs.k8s.io/controller-runtime/pkg/cluster"
1920 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
21+ "sigs.k8s.io/controller-runtime/pkg/event"
2022 "sigs.k8s.io/controller-runtime/pkg/finalizer"
2123 "sigs.k8s.io/controller-runtime/pkg/handler"
2224 "sigs.k8s.io/controller-runtime/pkg/log"
25+ "sigs.k8s.io/controller-runtime/pkg/predicate"
2326 "sigs.k8s.io/controller-runtime/pkg/reconcile"
2427 mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder"
2528 mccontext "sigs.k8s.io/multicluster-runtime/pkg/context"
@@ -34,6 +37,13 @@ import (
3437 instancecontrolstateful "go.datum.net/compute/internal/controller/instancecontrol/stateful"
3538)
3639
40+ const (
41+ // reasonReplicasAvailable is used in the ReplicasReady condition when replicas
42+ // are either available or pending; it has no equivalent in the API constants
43+ // package because it is an internal detail of this controller.
44+ reasonReplicasAvailable = "ReplicasAvailable"
45+ )
46+
3747// WorkloadDeploymentReconciler reconciles a WorkloadDeployment object
3848type WorkloadDeploymentReconciler struct {
3949 mgr mcmanager.Manager
@@ -107,6 +117,10 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco
107117 logger .Info ("reconciling deployment" )
108118 defer logger .Info ("reconcile complete" )
109119
120+ // Snapshot the existing status before any modifications so we can skip the
121+ // Status().Update call when nothing changed (see loop-prevention comment below).
122+ existingStatus := * deployment .Status .DeepCopy ()
123+
110124 // Collect all instances for this deployment
111125 listOpts := client.MatchingLabels {
112126 computev1alpha .WorkloadDeploymentUIDLabel : string (deployment .GetUID ()),
@@ -177,7 +191,7 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco
177191 desiredReplicas = 0
178192 }
179193
180- currentReplicas , updatedReplicas , readyReplicas , quotaBlockedReplicas , err := r .reconcileInstanceGates (ctx , cl .GetClient (), & deployment , instances .Items , networkReady )
194+ currentReplicas , updatedReplicas , readyReplicas , quotaBlockedReplicas , referencedDataBlockedReplicas , err := r .reconcileInstanceGates (ctx , cl .GetClient (), & deployment , instances .Items , networkReady )
181195 if err != nil {
182196 return ctrl.Result {}, err
183197 }
@@ -189,18 +203,26 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco
189203 deployment .Status .ReadyReplicas = int32 (readyReplicas )
190204 deployment .Status .ObservedGeneration = deployment .Generation
191205
192- if quotaBlockedReplicas > 0 {
206+ switch {
207+ case quotaBlockedReplicas > 0 :
193208 apimeta .SetStatusCondition (& deployment .Status .Conditions , metav1.Condition {
194209 Type : computev1alpha .WorkloadDeploymentReplicasReady ,
195210 Status : metav1 .ConditionFalse ,
196211 Reason : computev1alpha .InstanceQuotaGrantedReasonQuotaExceeded ,
197212 Message : fmt .Sprintf ("%d of %d desired replicas are pending quota" , quotaBlockedReplicas , desiredReplicas ),
198213 })
199- } else {
214+ case referencedDataBlockedReplicas > 0 :
215+ apimeta .SetStatusCondition (& deployment .Status .Conditions , metav1.Condition {
216+ Type : computev1alpha .WorkloadDeploymentReplicasReady ,
217+ Status : metav1 .ConditionFalse ,
218+ Reason : computev1alpha .ReferencedDataReasonAwaitingPropagation ,
219+ Message : fmt .Sprintf ("%d of %d desired replicas are waiting for referenced data companions" , referencedDataBlockedReplicas , desiredReplicas ),
220+ })
221+ default :
200222 apimeta .SetStatusCondition (& deployment .Status .Conditions , metav1.Condition {
201223 Type : computev1alpha .WorkloadDeploymentReplicasReady ,
202224 Status : metav1 .ConditionTrue ,
203- Reason : "ReplicasAvailable" ,
225+ Reason : reasonReplicasAvailable ,
204226 Message : fmt .Sprintf ("%d/%d replicas available" , readyReplicas , desiredReplicas ),
205227 })
206228 }
@@ -239,12 +261,19 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco
239261 })
240262 }
241263
242- if err := cl .GetClient ().Status ().Update (ctx , & deployment ); err != nil {
243- return ctrl.Result {}, fmt .Errorf ("failed updating deployment status: %w" , err )
264+ // Skip the write when the status is unchanged. Without this guard the
265+ // reconciler's own Status().Update would always produce a new resourceVersion,
266+ // firing another Update event on the WD and creating an infinite reconcile loop
267+ // before the predicate on For() was added. The guard is a belt-and-suspenders
268+ // complement to the predicate: the predicate prevents re-queuing on own writes,
269+ // and this guard avoids the superfluous API call entirely.
270+ if ! equality .Semantic .DeepEqual (existingStatus , deployment .Status ) {
271+ if err := cl .GetClient ().Status ().Update (ctx , & deployment ); err != nil {
272+ return ctrl.Result {}, fmt .Errorf ("failed updating deployment status: %w" , err )
273+ }
274+ logger .Info ("deployment status updated" )
244275 }
245276
246- logger .Info ("deployment status updated" )
247-
248277 return ctrl.Result {}, nil
249278}
250279
@@ -254,13 +283,17 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates(
254283 deployment * computev1alpha.WorkloadDeployment ,
255284 instances []computev1alpha.Instance ,
256285 networkReady bool ,
257- ) (currentReplicas , updatedReplicas , readyReplicas , quotaBlockedReplicas int , err error ) {
286+ ) (currentReplicas , updatedReplicas , readyReplicas , quotaBlockedReplicas , referencedDataBlockedReplicas int , err error ) {
258287 templateHash := instancecontrol .ComputeHash (deployment .Spec .Template )
259288 for _ , instance := range instances {
260289 if apimeta .IsStatusConditionPresentAndEqual (instance .Status .Conditions , computev1alpha .InstanceQuotaGranted , metav1 .ConditionFalse ) {
261290 quotaBlockedReplicas ++
262291 }
263292
293+ if apimeta .IsStatusConditionPresentAndEqual (instance .Status .Conditions , computev1alpha .ReferencedDataReady , metav1 .ConditionFalse ) {
294+ referencedDataBlockedReplicas ++
295+ }
296+
264297 // Spec.Controller is a nilable pointer; guard it before dereferencing the
265298 // scheduling gates so an instance without controller state cannot panic
266299 // the reconcile (mirrors the Status.Controller guard below).
@@ -273,7 +306,7 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates(
273306 instance .Spec .Controller .SchedulingGates = newGates
274307 return nil
275308 }); patchErr != nil {
276- return 0 , 0 , 0 , 0 , fmt .Errorf ("failed updating instance: %w" , patchErr )
309+ return 0 , 0 , 0 , 0 , 0 , fmt .Errorf ("failed updating instance: %w" , patchErr )
277310 }
278311 }
279312 }
@@ -300,7 +333,69 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates(
300333 readyReplicas ++
301334 }
302335 }
303- return currentReplicas , updatedReplicas , readyReplicas , quotaBlockedReplicas , nil
336+ return currentReplicas , updatedReplicas , readyReplicas , quotaBlockedReplicas , referencedDataBlockedReplicas , nil
337+ }
338+
339+ // wdReferencedDataChangedPredicate returns a predicate for the WorkloadDeployment
340+ // For() watch that fires on:
341+ // - Any Create, Delete, or Generic event (always enqueue).
342+ // - An Update event where metadata.generation changed (spec updated), OR where
343+ // the ReferencedDataReady condition's Status, Reason, or Message changed.
344+ //
345+ // The predicate intentionally does NOT fire when only the Available or
346+ // ReplicasReady conditions change, because those are written by this reconciler
347+ // itself. Without this guard the reconciler's own Status().Update would re-enqueue
348+ // itself on every run, creating a tight reconcile loop. The equality check before
349+ // Status().Update is a complementary guard, but the predicate is the primary
350+ // protection: it prevents re-enqueuing entirely so the workqueue stays quiet between
351+ // meaningful state transitions.
352+ //
353+ // Loop prevention: the ReferencedDataController (the only other writer of the
354+ // ReferencedDataReady condition) is the intended trigger. When it sets
355+ // ReferencedDataReady=False/SourceNotFound the predicate passes and this
356+ // reconciler re-runs, sees the resolver verdict in deployment.Status.Conditions, and
357+ // promotes Available to ReferencedDataNotReady. Subsequent runs by this reconciler
358+ // (which write Available but not ReferencedDataReady) are filtered out.
359+ func wdReferencedDataChangedPredicate () predicate.Predicate {
360+ return predicate.Funcs {
361+ UpdateFunc : func (e event.UpdateEvent ) bool {
362+ oldWD , ok1 := e .ObjectOld .(* computev1alpha.WorkloadDeployment )
363+ newWD , ok2 := e .ObjectNew .(* computev1alpha.WorkloadDeployment )
364+ if ! ok1 || ! ok2 {
365+ return true // be conservative when type assertion fails
366+ }
367+ // Spec change: always reconcile.
368+ if oldWD .Generation != newWD .Generation {
369+ return true
370+ }
371+ // ReferencedDataReady condition changed: reconcile so Available is
372+ // updated to reflect the resolver's verdict.
373+ return wdRefDataCondChanged (
374+ apimeta .FindStatusCondition (oldWD .Status .Conditions , computev1alpha .ReferencedDataReady ),
375+ apimeta .FindStatusCondition (newWD .Status .Conditions , computev1alpha .ReferencedDataReady ),
376+ )
377+ },
378+ CreateFunc : func (_ event.CreateEvent ) bool { return true },
379+ DeleteFunc : func (_ event.DeleteEvent ) bool { return true },
380+ GenericFunc : func (_ event.GenericEvent ) bool { return true },
381+ }
382+ }
383+
384+ // wdRefDataCondChanged returns true when the ReferencedDataReady condition's
385+ // observable fields (Status, Reason, Message) differ between old and new. Presence
386+ // changes (nil → non-nil or vice versa) are also treated as a change. The
387+ // LastTransitionTime field is excluded because it changes on every status flip and
388+ // would defeat the loop-prevention intent of wdReferencedDataChangedPredicate.
389+ func wdRefDataCondChanged (old , new * metav1.Condition ) bool {
390+ if (old == nil ) != (new == nil ) {
391+ return true // condition was added or removed
392+ }
393+ if old == nil {
394+ return false // both nil — no change
395+ }
396+ return old .Status != new .Status ||
397+ old .Reason != new .Reason ||
398+ old .Message != new .Message
304399}
305400
306401// reconcileNetworks ensures NetworkBindings and SubnetClaims exist for all
@@ -573,7 +668,16 @@ func (r *WorkloadDeploymentReconciler) SetupWithManager(mgr mcmanager.Manager, o
573668 }
574669
575670 b := mcbuilder .ControllerManagedBy (mgr ).
576- For (& computev1alpha.WorkloadDeployment {}, mcbuilder .WithEngageWithLocalCluster (false )).
671+ // The predicate gates re-enqueuing on meaningful WD changes: spec updates
672+ // (generation bump) or a ReferencedDataReady condition change written by
673+ // ReferencedDataController. Without it, each Status().Update by this
674+ // reconciler (writing Available/ReplicasReady) would re-enqueue itself,
675+ // creating a tight loop and delaying the ReferencedDataReady signal from
676+ // the resolver.
677+ For (& computev1alpha.WorkloadDeployment {},
678+ mcbuilder .WithEngageWithLocalCluster (false ),
679+ mcbuilder .WithPredicates (wdReferencedDataChangedPredicate ()),
680+ ).
577681 Owns (& computev1alpha.Instance {})
578682
579683 // Only watch networking resources when the networking integration is enabled.
0 commit comments