Skip to content

Commit 0e8e58c

Browse files
committed
Gate compute-service deletion on agent pods being evicted
A running nova-compute pod would re-register the service immediately after deletion, undoing the offboarding. Introduce AgentPodsEvicted condition computed by HypervisorController (only once Evicting=False) and block service deletion in the offboarding controller until it is True. Poll with RequeueAfter since pods are not watched.
1 parent 66a321a commit 0e8e58c

7 files changed

Lines changed: 340 additions & 2 deletions

File tree

api/v1/hypervisor_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ const (
7171

7272
// ConditionTypeAggregatesUpdated is the type of condition for aggregates updated status of a hypervisor
7373
ConditionTypeAggregatesUpdated = "AggregatesUpdated"
74+
75+
// ConditionTypeAgentPodsEvicted gates compute-service deletion during
76+
// offboarding: a running nova-compute pod would otherwise re-register
77+
// the service we are about to delete.
78+
ConditionTypeAgentPodsEvicted = "AgentPodsEvicted"
7479
)
7580

7681
// Condition Reasons

charts/openstack-hypervisor-operator/templates/role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ rules:
2020
- nodes/status
2121
verbs:
2222
- get
23+
- apiGroups:
24+
- ""
25+
resources:
26+
- pods
27+
verbs:
28+
- get
29+
- list
30+
- watch
2331
- apiGroups:
2432
- apps
2533
resources:

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ require (
4343
github.com/felixge/httpsnoop v1.0.4 // indirect
4444
github.com/fsnotify/fsnotify v1.9.0 // indirect
4545
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
46-
github.com/go-logr/logr v1.4.3 // indirect
46+
github.com/go-logr/logr v1.4.3
4747
github.com/go-logr/stdr v1.2.2 // indirect
4848
github.com/go-logr/zapr v1.3.0 // indirect
4949
github.com/go-openapi/jsonpointer v0.22.4 // indirect

internal/controller/hypervisor_controller.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@ import (
2121
"context"
2222
"fmt"
2323
"slices"
24+
"sort"
2425
"strings"
2526

27+
"github.com/go-logr/logr"
2628
corev1 "k8s.io/api/core/v1"
2729
"k8s.io/apimachinery/pkg/api/equality"
2830
"k8s.io/apimachinery/pkg/api/errors"
2931
"k8s.io/apimachinery/pkg/api/meta"
3032
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33+
"k8s.io/apimachinery/pkg/fields"
3134
"k8s.io/apimachinery/pkg/labels"
3235
"k8s.io/apimachinery/pkg/runtime"
3336
ctrl "sigs.k8s.io/controller-runtime"
@@ -69,6 +72,7 @@ type HypervisorController struct {
6972

7073
// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch
7174
// +kubebuilder:rbac:groups="",resources=nodes/status,verbs=get
75+
// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch
7276
// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch;create;update;patch;delete
7377
// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;update;patch
7478

@@ -123,16 +127,40 @@ func (hv *HypervisorController) Reconcile(ctx context.Context, req ctrl.Request)
123127
})
124128
}
125129

130+
// Only evaluate after VM eviction; a spurious True on a fresh node
131+
// (agents not yet scheduled) would be misleading.
132+
if hypervisor.Spec.Maintenance == kvmv1.MaintenanceTermination &&
133+
meta.IsStatusConditionFalse(hypervisor.Status.Conditions, kvmv1.ConditionTypeEvicting) {
134+
cond, err := hv.computeAgentPodsEvictedCondition(ctx, log, node.Name)
135+
if err != nil {
136+
return ctrl.Result{}, fmt.Errorf("failed to compute %s condition: %w", kvmv1.ConditionTypeAgentPodsEvicted, err)
137+
}
138+
meta.SetStatusCondition(&hypervisor.Status.Conditions, cond)
139+
if cond.Status == metav1.ConditionFalse {
140+
// No pod watch — rely on periodic requeue.
141+
if err := utils.PatchHypervisorStatusWithRetry(ctx, hv.Client, hypervisor.Name, HypervisorControllerName, func(h *kvmv1.Hypervisor) {
142+
meta.SetStatusCondition(&h.Status.Conditions, cond)
143+
}); err != nil {
144+
return ctrl.Result{}, err
145+
}
146+
return ctrl.Result{RequeueAfter: defaultPollTime}, nil
147+
}
148+
}
149+
126150
if !equality.Semantic.DeepEqual(hypervisor, base) {
127151
// Capture values to apply - only mutate fields this controller owns
128152
newInternalIP := hypervisor.Status.InternalIP
129153
terminatingCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTerminating)
154+
agentPodsCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeAgentPodsEvicted)
130155

131156
return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, hv.Client, hypervisor.Name, HypervisorControllerName, func(h *kvmv1.Hypervisor) {
132157
h.Status.InternalIP = newInternalIP
133158
if terminatingCondition != nil {
134159
meta.SetStatusCondition(&h.Status.Conditions, *terminatingCondition)
135160
}
161+
if agentPodsCondition != nil {
162+
meta.SetStatusCondition(&h.Status.Conditions, *agentPodsCondition)
163+
}
136164
})
137165
}
138166

@@ -262,3 +290,68 @@ func transportLabels(source, destination *metav1.ObjectMeta) {
262290
}
263291
}
264292
}
293+
294+
// computeAgentPodsEvictedCondition checks whether pods that would be evicted
295+
// by the offboarding taint are still running. Only call once Evicting=False.
296+
func (hv *HypervisorController) computeAgentPodsEvictedCondition(ctx context.Context, log logr.Logger, nodeName string) (metav1.Condition, error) {
297+
pods := &corev1.PodList{}
298+
if err := hv.List(ctx, pods, k8sclient.MatchingFieldsSelector{
299+
Selector: fields.OneTermEqualSelector("spec.nodeName", nodeName),
300+
}); err != nil {
301+
return metav1.Condition{}, fmt.Errorf("failed to list pods on node %s: %w", nodeName, err)
302+
}
303+
304+
offboardingTaint := corev1.Taint{
305+
Key: taintKeyOffboarding,
306+
Effect: corev1.TaintEffectNoExecute,
307+
}
308+
309+
var agentPods []string
310+
for _, pod := range pods.Items {
311+
if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
312+
continue
313+
}
314+
// Deletion in progress; don't wait for API object removal
315+
// (may be blocked on unrelated finalizers).
316+
if pod.DeletionTimestamp != nil {
317+
continue
318+
}
319+
if podToleratesTaint(log, &pod, &offboardingTaint) {
320+
continue
321+
}
322+
agentPods = append(agentPods, pod.Namespace+"/"+pod.Name)
323+
}
324+
325+
if len(agentPods) == 0 {
326+
return metav1.Condition{
327+
Type: kvmv1.ConditionTypeAgentPodsEvicted,
328+
Status: metav1.ConditionTrue,
329+
Reason: "NoAgentPods",
330+
Message: "No agent pods are running on this node",
331+
}, nil
332+
}
333+
334+
sort.Strings(agentPods)
335+
return metav1.Condition{
336+
Type: kvmv1.ConditionTypeAgentPodsEvicted,
337+
Status: metav1.ConditionFalse,
338+
Reason: "AgentPodsRunning",
339+
Message: fmt.Sprintf("%d agent pod(s) still running on node: %s", len(agentPods), strings.Join(agentPods, ", ")),
340+
}, nil
341+
}
342+
343+
// podToleratesTaint reports whether the pod tolerates the taint indefinitely.
344+
// Tolerations with a finite TolerationSeconds are excluded: the pod will
345+
// eventually be evicted and must not be treated as safe to ignore.
346+
func podToleratesTaint(log logr.Logger, pod *corev1.Pod, taint *corev1.Taint) bool {
347+
for i := range pod.Spec.Tolerations {
348+
t := &pod.Spec.Tolerations[i]
349+
if t.TolerationSeconds != nil {
350+
continue
351+
}
352+
if t.ToleratesTaint(log, taint, false) {
353+
return true
354+
}
355+
}
356+
return false
357+
}

internal/controller/hypervisor_controller_test.go

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@ limitations under the License.
1818
package controller
1919

2020
import (
21+
"fmt"
22+
"sync/atomic"
23+
2124
. "github.com/onsi/ginkgo/v2"
2225
. "github.com/onsi/gomega"
2326
corev1 "k8s.io/api/core/v1"
27+
"k8s.io/apimachinery/pkg/api/meta"
2428
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2529
"k8s.io/apimachinery/pkg/types"
2630
ctrl "sigs.k8s.io/controller-runtime"
@@ -29,6 +33,10 @@ import (
2933
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
3034
)
3135

36+
// envtest does not actually GC pods when their namespace is deleted, so
37+
// each spec gets a fresh namespace via this counter to keep them isolated.
38+
var agentNamespaceCounter atomic.Uint64
39+
3240
var _ = Describe("Hypervisor Controller", func() {
3341
const (
3442
resourceName = "other-node"
@@ -402,4 +410,164 @@ var _ = Describe("Hypervisor Controller", func() {
402410
Expect(hypervisor.Status.InternalIP).To(Equal("192.168.1.100"))
403411
})
404412
})
413+
414+
Context("AgentPodsEvicted condition", func() {
415+
var agentNamespace string
416+
417+
BeforeEach(func(ctx SpecContext) {
418+
agentNamespace = fmt.Sprintf("agent-ns-%d", agentNamespaceCounter.Add(1))
419+
ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: agentNamespace}}
420+
Expect(client.IgnoreAlreadyExists(k8sClient.Create(ctx, ns))).To(Succeed())
421+
DeferCleanup(func(ctx SpecContext) {
422+
Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, ns))).To(Succeed())
423+
})
424+
425+
// The condition is only computed during termination.
426+
_, err := hypervisorController.Reconcile(ctx, ctrl.Request{
427+
NamespacedName: types.NamespacedName{Name: resource.Name},
428+
})
429+
Expect(err).NotTo(HaveOccurred())
430+
431+
hypervisor := &kvmv1.Hypervisor{}
432+
Expect(k8sClient.Get(ctx, hypervisorName, hypervisor)).To(Succeed())
433+
hypervisor.Spec.Maintenance = kvmv1.MaintenanceTermination
434+
hypervisor.Spec.LifecycleEnabled = true
435+
Expect(k8sClient.Update(ctx, hypervisor)).To(Succeed())
436+
437+
// Default for these specs: VM eviction is done. Subcontexts
438+
// that exercise the "not yet done" path override this.
439+
Expect(k8sClient.Get(ctx, hypervisorName, hypervisor)).To(Succeed())
440+
meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{
441+
Type: kvmv1.ConditionTypeEvicting,
442+
Status: metav1.ConditionFalse,
443+
Reason: "Succeeded",
444+
Message: "All VMs evicted",
445+
})
446+
Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed())
447+
})
448+
449+
createPod := func(ctx SpecContext, name, namespace string, phase corev1.PodPhase, tolerations ...corev1.Toleration) *corev1.Pod {
450+
pod := &corev1.Pod{
451+
ObjectMeta: metav1.ObjectMeta{
452+
Name: name,
453+
Namespace: namespace,
454+
},
455+
Spec: corev1.PodSpec{
456+
NodeName: resource.Name,
457+
Containers: []corev1.Container{
458+
{Name: "main", Image: "registry.example.com/whatever:latest"},
459+
},
460+
Tolerations: tolerations,
461+
},
462+
Status: corev1.PodStatus{Phase: phase},
463+
}
464+
Expect(k8sClient.Create(ctx, pod)).To(Succeed())
465+
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, pod)).To(Succeed())
466+
pod.Status.Phase = phase
467+
Expect(k8sClient.Status().Update(ctx, pod)).To(Succeed())
468+
DeferCleanup(func(ctx SpecContext) {
469+
Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, pod))).To(Succeed())
470+
})
471+
return pod
472+
}
473+
474+
When("only pods that tolerate the offboarding taint are running", func() {
475+
BeforeEach(func(ctx SpecContext) {
476+
createPod(ctx, "tolerator", agentNamespace, corev1.PodRunning, corev1.Toleration{
477+
Key: taintKeyOffboarding,
478+
Operator: corev1.TolerationOpExists,
479+
Effect: corev1.TaintEffectNoExecute,
480+
})
481+
// Phase=Succeeded must not count regardless of tolerations.
482+
createPod(ctx, "old-job", agentNamespace, corev1.PodSucceeded)
483+
})
484+
485+
It("should set AgentPodsEvicted=True without requeue", func(ctx SpecContext) {
486+
result, err := hypervisorController.Reconcile(ctx, ctrl.Request{
487+
NamespacedName: types.NamespacedName{Name: resource.Name},
488+
})
489+
Expect(err).NotTo(HaveOccurred())
490+
Expect(result.RequeueAfter).To(BeZero())
491+
492+
hypervisor := &kvmv1.Hypervisor{}
493+
Expect(k8sClient.Get(ctx, hypervisorName, hypervisor)).To(Succeed())
494+
Expect(hypervisor.Status.Conditions).To(ContainElement(
495+
SatisfyAll(
496+
HaveField("Type", kvmv1.ConditionTypeAgentPodsEvicted),
497+
HaveField("Status", metav1.ConditionTrue),
498+
HaveField("Reason", "NoAgentPods"),
499+
),
500+
))
501+
})
502+
})
503+
504+
When("an agent pod is running on the node and VM eviction is done", func() {
505+
BeforeEach(func(ctx SpecContext) {
506+
createPod(ctx, "nova-compute-xyz", agentNamespace, corev1.PodRunning)
507+
})
508+
509+
It("should set AgentPodsEvicted=False with reason AgentPodsRunning and requeue", func(ctx SpecContext) {
510+
result, err := hypervisorController.Reconcile(ctx, ctrl.Request{
511+
NamespacedName: types.NamespacedName{Name: resource.Name},
512+
})
513+
Expect(err).NotTo(HaveOccurred())
514+
Expect(result.RequeueAfter).To(Equal(defaultPollTime))
515+
516+
hypervisor := &kvmv1.Hypervisor{}
517+
Expect(k8sClient.Get(ctx, hypervisorName, hypervisor)).To(Succeed())
518+
Expect(hypervisor.Status.Conditions).To(ContainElement(
519+
SatisfyAll(
520+
HaveField("Type", kvmv1.ConditionTypeAgentPodsEvicted),
521+
HaveField("Status", metav1.ConditionFalse),
522+
HaveField("Reason", "AgentPodsRunning"),
523+
HaveField("Message", ContainSubstring("nova-compute-xyz")),
524+
),
525+
))
526+
})
527+
})
528+
529+
When("the only non-tolerating pod is already being deleted", func() {
530+
// A finalizer keeps the API object around with DeletionTimestamp
531+
// set, simulating a pod whose containers are shutting down but
532+
// whose deletion is blocked on some unrelated finalizer.
533+
const finalizer = "test.kvm.cloud.sap/keep-alive"
534+
535+
BeforeEach(func(ctx SpecContext) {
536+
pod := createPod(ctx, "nova-compute-deleting", agentNamespace, corev1.PodRunning)
537+
pod.Finalizers = []string{finalizer}
538+
Expect(k8sClient.Update(ctx, pod)).To(Succeed())
539+
Expect(k8sClient.Delete(ctx, pod)).To(Succeed())
540+
541+
DeferCleanup(func(ctx SpecContext) {
542+
fresh := &corev1.Pod{}
543+
if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, fresh); err == nil {
544+
fresh.Finalizers = nil
545+
Expect(client.IgnoreNotFound(k8sClient.Update(ctx, fresh))).To(Succeed())
546+
}
547+
})
548+
549+
// Sanity: the pod must still exist with DeletionTimestamp.
550+
fresh := &corev1.Pod{}
551+
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, fresh)).To(Succeed())
552+
Expect(fresh.DeletionTimestamp).NotTo(BeNil())
553+
})
554+
555+
It("should set AgentPodsEvicted=True (deletion-pending pod is treated as gone)", func(ctx SpecContext) {
556+
_, err := hypervisorController.Reconcile(ctx, ctrl.Request{
557+
NamespacedName: types.NamespacedName{Name: resource.Name},
558+
})
559+
Expect(err).NotTo(HaveOccurred())
560+
561+
hypervisor := &kvmv1.Hypervisor{}
562+
Expect(k8sClient.Get(ctx, hypervisorName, hypervisor)).To(Succeed())
563+
Expect(hypervisor.Status.Conditions).To(ContainElement(
564+
SatisfyAll(
565+
HaveField("Type", kvmv1.ConditionTypeAgentPodsEvicted),
566+
HaveField("Status", metav1.ConditionTrue),
567+
HaveField("Reason", "NoAgentPods"),
568+
),
569+
))
570+
})
571+
})
572+
})
405573
})

internal/controller/offboarding_controller.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,16 @@ func (r *HypervisorOffboardingReconciler) Reconcile(ctx context.Context, req ctr
127127
return r.setOffboardingCondition(ctx, hv, msg)
128128
}
129129

130-
// Deleting and evicted, so better delete the service
130+
// A still-running nova-compute agent would re-register the service we
131+
// are about to delete, undoing the offboarding.
132+
if !meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeAgentPodsEvicted) {
133+
msg := "Waiting for agent pods (nova-compute, neutron) to be evicted from node"
134+
if cond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeAgentPodsEvicted); cond != nil && cond.Message != "" {
135+
msg = "Waiting for agent pods to be evicted: " + cond.Message
136+
}
137+
return r.setOffboardingCondition(ctx, hv, msg)
138+
}
139+
131140
err = services.Delete(ctx, r.computeClient, hypervisor.Service.ID).ExtractErr()
132141
if err != nil && !gophercloud.ResponseCodeIs(err, http.StatusNotFound) {
133142
msg := fmt.Sprintf("cannot delete service %s due to %v", hypervisor.Service.ID, err)

0 commit comments

Comments
 (0)