@@ -21,13 +21,17 @@ import (
2121 "context"
2222 "fmt"
2323 "slices"
24+ "sort"
2425 "strings"
26+ "time"
2527
28+ "github.com/go-logr/logr"
2629 corev1 "k8s.io/api/core/v1"
2730 "k8s.io/apimachinery/pkg/api/equality"
2831 "k8s.io/apimachinery/pkg/api/errors"
2932 "k8s.io/apimachinery/pkg/api/meta"
3033 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34+ "k8s.io/apimachinery/pkg/fields"
3135 "k8s.io/apimachinery/pkg/labels"
3236 "k8s.io/apimachinery/pkg/runtime"
3337 ctrl "sigs.k8s.io/controller-runtime"
@@ -69,6 +73,7 @@ type HypervisorController struct {
6973
7074// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch
7175// +kubebuilder:rbac:groups="",resources=nodes/status,verbs=get
76+ // +kubebuilder:rbac:groups="",resources=pods,verbs=list
7277// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch;create;update;patch;delete
7378// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;update;patch
7479
@@ -100,8 +105,22 @@ func (hv *HypervisorController) Reconcile(ctx context.Context, req ctrl.Request)
100105 }
101106 // continue with creation
102107 } else {
103- // update Status if needed
104- base := hypervisor .DeepCopy ()
108+ // First, propagate spec/metadata derived from the Node (labels,
109+ // annotations -> aggregates/traits, lifecycle). This must run on
110+ // every reconcile, including those where status will also change
111+ // (e.g. AgentPodsEvicted=False during termination); otherwise the
112+ // Hypervisor spec/labels go stale.
113+ specBase := hypervisor .DeepCopy ()
114+ syncLabelsAndAnnotations (nodeLabels , hypervisor , node )
115+ if ! equality .Semantic .DeepEqual (hypervisor , specBase ) {
116+ if err := hv .Patch (ctx , hypervisor , k8sclient .MergeFromWithOptions (specBase ,
117+ k8sclient.MergeFromWithOptimisticLock {}), k8sclient .FieldOwner (HypervisorControllerName )); err != nil {
118+ return ctrl.Result {}, err
119+ }
120+ }
121+
122+ // Then, compute and persist any status changes derived from the Node.
123+ statusBase := hypervisor .DeepCopy ()
105124
106125 // transfer internal IP
107126 for _ , address := range node .Status .Addresses {
@@ -123,26 +142,43 @@ func (hv *HypervisorController) Reconcile(ctx context.Context, req ctrl.Request)
123142 })
124143 }
125144
126- if ! equality .Semantic .DeepEqual (hypervisor , base ) {
145+ // Only evaluate after VM eviction; a spurious True on a fresh node
146+ // (agents not yet scheduled) would be misleading.
147+ var statusRequeueAfter time.Duration
148+ if hypervisor .Spec .Maintenance == kvmv1 .MaintenanceTermination &&
149+ meta .IsStatusConditionFalse (hypervisor .Status .Conditions , kvmv1 .ConditionTypeEvicting ) &&
150+ nodeHasOffboardingTaint (node ) {
151+ cond , err := hv .computeAgentPodsEvictedCondition (ctx , log , node .Name )
152+ if err != nil {
153+ return ctrl.Result {}, fmt .Errorf ("failed to compute %s condition: %w" , kvmv1 .ConditionTypeAgentPodsEvicted , err )
154+ }
155+ meta .SetStatusCondition (& hypervisor .Status .Conditions , cond )
156+ if cond .Status == metav1 .ConditionFalse {
157+ // No pod watch — rely on periodic requeue.
158+ statusRequeueAfter = defaultPollTime
159+ }
160+ }
161+
162+ if ! equality .Semantic .DeepEqual (hypervisor , statusBase ) {
127163 // Capture values to apply - only mutate fields this controller owns
128164 newInternalIP := hypervisor .Status .InternalIP
129165 terminatingCondition := meta .FindStatusCondition (hypervisor .Status .Conditions , kvmv1 .ConditionTypeTerminating )
166+ agentPodsCondition := meta .FindStatusCondition (hypervisor .Status .Conditions , kvmv1 .ConditionTypeAgentPodsEvicted )
130167
131- return ctrl. Result {}, utils .PatchHypervisorStatusWithRetry (ctx , hv .Client , hypervisor .Name , HypervisorControllerName , func (h * kvmv1.Hypervisor ) {
168+ if err := utils .PatchHypervisorStatusWithRetry (ctx , hv .Client , hypervisor .Name , HypervisorControllerName , func (h * kvmv1.Hypervisor ) {
132169 h .Status .InternalIP = newInternalIP
133170 if terminatingCondition != nil {
134171 meta .SetStatusCondition (& h .Status .Conditions , * terminatingCondition )
135172 }
136- })
137- }
138-
139- syncLabelsAndAnnotations ( nodeLabels , hypervisor , node )
140- if equality . Semantic . DeepEqual ( hypervisor , base ) {
141- return ctrl. Result {}, nil
173+ if agentPodsCondition != nil {
174+ meta . SetStatusCondition ( & h . Status . Conditions , * agentPodsCondition )
175+ }
176+ }); err != nil {
177+ return ctrl. Result {}, err
178+ }
142179 }
143180
144- return ctrl.Result {}, hv .Patch (ctx , hypervisor , k8sclient .MergeFromWithOptions (base ,
145- k8sclient.MergeFromWithOptimisticLock {}), k8sclient .FieldOwner (HypervisorControllerName ))
181+ return ctrl.Result {RequeueAfter : statusRequeueAfter }, nil
146182 }
147183
148184 syncLabelsAndAnnotations (nodeLabels , hypervisor , node )
@@ -262,3 +298,89 @@ func transportLabels(source, destination *metav1.ObjectMeta) {
262298 }
263299 }
264300}
301+
302+ // nodeHasOffboardingTaint reports whether the offboarding NoExecute taint has
303+ // been applied to the node. The pod list is only meaningful after that point —
304+ // before it, no agents have been evicted yet.
305+ func nodeHasOffboardingTaint (node * corev1.Node ) bool {
306+ for _ , t := range node .Spec .Taints {
307+ if t .Key == taintKeyOffboarding && t .Effect == corev1 .TaintEffectNoExecute {
308+ return true
309+ }
310+ }
311+ return false
312+ }
313+
314+ // computeAgentPodsEvictedCondition checks whether pods that would be evicted
315+ // by the offboarding taint are still running. Only call once Evicting=False.
316+ func (hv * HypervisorController ) computeAgentPodsEvictedCondition (ctx context.Context , log logr.Logger , nodeName string ) (metav1.Condition , error ) {
317+ offboardingTaint := corev1.Taint {
318+ Key : taintKeyOffboarding ,
319+ Effect : corev1 .TaintEffectNoExecute ,
320+ }
321+
322+ var agentPods []string
323+ for _ , ns := range global .AgentNamespaces {
324+ var continueToken string
325+ for {
326+ pods := & corev1.PodList {}
327+ if err := hv .List (ctx , pods ,
328+ k8sclient .InNamespace (ns ),
329+ k8sclient.MatchingFieldsSelector {
330+ Selector : fields .OneTermEqualSelector ("spec.nodeName" , nodeName ),
331+ },
332+ & k8sclient.ListOptions {Limit : 100 , Continue : continueToken },
333+ ); err != nil {
334+ return metav1.Condition {}, fmt .Errorf ("failed to list pods on node %s in namespace %q: %w" , nodeName , ns , err )
335+ }
336+
337+ for _ , pod := range pods .Items {
338+ if pod .Status .Phase == corev1 .PodSucceeded || pod .Status .Phase == corev1 .PodFailed {
339+ continue
340+ }
341+ if podToleratesTaint (log , & pod , & offboardingTaint ) {
342+ continue
343+ }
344+ agentPods = append (agentPods , pod .Namespace + "/" + pod .Name )
345+ }
346+
347+ if pods .Continue == "" {
348+ break
349+ }
350+ continueToken = pods .Continue
351+ }
352+ }
353+
354+ if len (agentPods ) == 0 {
355+ return metav1.Condition {
356+ Type : kvmv1 .ConditionTypeAgentPodsEvicted ,
357+ Status : metav1 .ConditionTrue ,
358+ Reason : "NoAgentPods" ,
359+ Message : "No agent pods are running on this node" ,
360+ }, nil
361+ }
362+
363+ sort .Strings (agentPods )
364+ return metav1.Condition {
365+ Type : kvmv1 .ConditionTypeAgentPodsEvicted ,
366+ Status : metav1 .ConditionFalse ,
367+ Reason : "AgentPodsRunning" ,
368+ Message : fmt .Sprintf ("%d agent pod(s) still running on node: %s" , len (agentPods ), strings .Join (agentPods , ", " )),
369+ }, nil
370+ }
371+
372+ // podToleratesTaint reports whether the pod tolerates the taint indefinitely.
373+ // Tolerations with a finite TolerationSeconds are excluded: the pod will
374+ // eventually be evicted and must not be treated as safe to ignore.
375+ func podToleratesTaint (log logr.Logger , pod * corev1.Pod , taint * corev1.Taint ) bool {
376+ for i := range pod .Spec .Tolerations {
377+ t := & pod .Spec .Tolerations [i ]
378+ if t .TolerationSeconds != nil {
379+ continue
380+ }
381+ if t .ToleratesTaint (log , taint , false ) {
382+ return true
383+ }
384+ }
385+ return false
386+ }
0 commit comments