Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/v1/hypervisor_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ const (

// ConditionTypeAggregatesUpdated is the type of condition for aggregates updated status of a hypervisor
ConditionTypeAggregatesUpdated = "AggregatesUpdated"

// ConditionTypeAgentPodsEvicted gates compute-service deletion during
// offboarding: a running nova-compute pod would otherwise re-register
// the service we are about to delete.
ConditionTypeAgentPodsEvicted = "AgentPodsEvicted"
)

// Condition Reasons
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ spec:
value: {{ quote .Values.controllerManager.manager.env.certificateIssuerName }}
- name: LABEL_SELECTOR
value: {{ quote .Values.controllerManager.manager.env.labelSelector }}
- name: AGENT_NAMESPACES
value: {{ quote .Values.controllerManager.manager.env.agentNamespaces }}
- name: KUBERNETES_CLUSTER_DOMAIN
value: {{ quote .Values.kubernetesClusterDomain }}
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Chart.AppVersion }}
Expand Down
6 changes: 6 additions & 0 deletions charts/openstack-hypervisor-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ rules:
- nodes/status
verbs:
- get
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- apiGroups:
- apps
resources:
Expand Down
2 changes: 2 additions & 0 deletions charts/openstack-hypervisor-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ controllerManager:
- --certificate-namespace=$(CERTIFICATE_NAMESPACE)
- --certificate-issuer-name=$(CERTIFICATE_ISSUER_NAME)
- --label-selector=$(LABEL_SELECTOR)
- --agent-namespaces=$(AGENT_NAMESPACES)
containerSecurityContext:
allowPrivilegeEscalation: false
capabilities:
Expand All @@ -16,6 +17,7 @@ controllerManager:
certificateIssuerName: nova-hypervisor-agents-ca-issuer
certificateNamespace: monsoon3
labelSelector: ""
agentNamespaces: "monsoon3"
osAuthUrl: ""
osProjectDomainName: ""
osProjectName: ""
Expand Down
37 changes: 37 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"fmt"
"os"
gruntime "runtime"
"strings"

"github.com/sapcc/go-api-declarations/bininfo"
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand Down Expand Up @@ -97,6 +98,10 @@ func main() {
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
flag.StringVar(&global.LabelSelector, "label-selector", "", "Label selector to filter watched resources (namely nodes).")

var agentNamespacesFlag string
flag.StringVar(&agentNamespacesFlag, "agent-namespaces", "",
"Comma-separated list of namespaces to search for agent pods (nova-compute, neutron) during offboarding.")

flag.StringVar(&certificateNamespace, "certificate-namespace", "monsoon3", "The namespace for the certificates. ")
flag.StringVar(&certificateIssuerName, "certificate-issuer-name", "nova-hypervisor-agents-ca-issuer",
"Name of the certificate issuer.")
Expand All @@ -111,13 +116,38 @@ func main() {
opts.BindFlags(flag.CommandLine)
flag.Parse()

// Handle --version before validating required runtime flags so that
// `./manager --version` works without providing --agent-namespaces.
if version {
fmt.Printf("%s %s (%s/%s) %s\n",
bininfo.Component(), bininfo.VersionOr("devel"), gruntime.GOOS, gruntime.GOARCH,
bininfo.CommitOr("edge"))
os.Exit(0)
}

if agentNamespacesFlag != "" {
// Deduplicate. Without this, repeated entries (e.g. from templated
// values) would cause redundant pod-list calls and inflate the
// AgentPodsEvicted condition message with duplicate pod names.
seen := map[string]struct{}{}
for ns := range strings.SplitSeq(agentNamespacesFlag, ",") {
ns = strings.TrimSpace(ns)
if ns == "" {
continue
}
if _, ok := seen[ns]; ok {
continue
}
seen[ns] = struct{}{}
global.AgentNamespaces = append(global.AgentNamespaces, ns)
}
}

if len(global.AgentNamespaces) == 0 {
setupLog.Error(errors.New("--agent-namespaces is required"), "invalid configuration")
os.Exit(1)
}
Comment thread
fwiesel marked this conversation as resolved.
Comment thread
fwiesel marked this conversation as resolved.

if certificateIssuerName == "" {
setupLog.Error(errors.New("certificate-issuer-name cannot be empty"), "invalid certificate issuer name")
os.Exit(1)
Expand Down Expand Up @@ -226,6 +256,13 @@ func main() {

// Optionally configure the cache to listen/watch for specific labeled resources only
Cache: cacheOptions,
// Pods are listed directly (not cached) to avoid a cluster-wide pod
// informer — a single large namespace would cause an OOM on startup.
Client: client.Options{
Cache: &client.CacheOptions{
DisableFor: []client.Object{&corev1.Pod{}},
},
},
})

if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ require (
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/logr v1.4.3
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.22.4 // indirect
Expand Down
5 changes: 5 additions & 0 deletions internal/controller/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,9 @@ package controller
const (
labelHypervisor = "nova.openstack.cloud.sap/virt-driver"
testAggregateName = "tenant_filter_tests"

// taintKeyOffboarding is used as a NoExecute taint. nova-compute and
// neutron agent pods do not tolerate it (the kvm-node-agent and the
// signalling pod do), so applying it forces those agents off the node.
taintKeyOffboarding = "kvm.cloud.sap/offboarding"
)
81 changes: 80 additions & 1 deletion internal/controller/gardener_node_lifecycle_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,13 @@ import (
v1 "k8s.io/client-go/applyconfigurations/meta/v1"
policyv1ac "k8s.io/client-go/applyconfigurations/policy/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
logger "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"

kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
)
Expand Down Expand Up @@ -85,6 +89,19 @@ func (r *GardenerNodeLifecycleController) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, nil
}

// Apply the offboarding taint once VMs are gone; gate on Evicting=False
// to avoid racing with live-migration.
if hv.Spec.Maintenance == kvmv1.MaintenanceTermination &&
meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) {
patched, err := r.ensureOffboardingTaint(ctx, node)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to ensure offboarding taint: %w", err)
}
if patched {
return ctrl.Result{}, nil
}
}

// We do not care about the particular value, as long as it isn't an error
var minAvailable int32 = 1

Expand Down Expand Up @@ -115,6 +132,30 @@ func (r *GardenerNodeLifecycleController) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, nil
}

// ensureOffboardingTaint adds the offboarding NoExecute taint if not already
// present. Returns true when a patch was issued (caller should return early).
func (r *GardenerNodeLifecycleController) ensureOffboardingTaint(ctx context.Context, node *corev1.Node) (bool, error) {
for _, t := range node.Spec.Taints {
if t.Key == taintKeyOffboarding && t.Effect == corev1.TaintEffectNoExecute {
return false, nil
}
}

log := logger.FromContext(ctx)
log.Info("Adding offboarding taint to node",
"node", node.Name,
"taint", taintKeyOffboarding,
"effect", corev1.TaintEffectNoExecute)

// StrategicMergeFrom merges taints by key, preserving concurrent additions.
patch := k8sclient.StrategicMergeFrom(node.DeepCopy())
node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{
Key: taintKeyOffboarding,
Effect: corev1.TaintEffectNoExecute,
})
return true, r.Patch(ctx, node, patch, k8sclient.FieldOwner(MaintenanceControllerName))
}

func (r *GardenerNodeLifecycleController) ensureBlockingPodDisruptionBudget(ctx context.Context, node *corev1.Node, minAvailable int32) error {
name := nameForNode(node)
nodeLabels := labelsForNode(node)
Expand Down Expand Up @@ -226,10 +267,48 @@ func (r *GardenerNodeLifecycleController) SetupWithManager(mgr ctrl.Manager, nam
_ = logger.FromContext(ctx)
r.namespace = namespace

hypervisorToNode := handler.EnqueueRequestForOwner(mgr.GetScheme(), mgr.GetRESTMapper(), &corev1.Node{})

// Maintenance=termination bumps generation; Evicting status changes do not.
hypervisorRelevantChange := predicate.Or(
predicate.GenerationChangedPredicate{},
evictingConditionChangedPredicate{},
)

return ctrl.NewControllerManagedBy(mgr).
Named(MaintenanceControllerName).
For(&corev1.Node{}).
Owns(&appsv1.Deployment{}). // trigger the r.Reconcile whenever an Own-ed deployment is created/updated/deleted
Watches(&kvmv1.Hypervisor{}, hypervisorToNode,
builder.WithPredicates(hypervisorRelevantChange),
).
Owns(&appsv1.Deployment{}).
Owns(&policyv1.PodDisruptionBudget{}).
Complete(r)
}

// evictingConditionChangedPredicate complements GenerationChangedPredicate,
// which ignores status-only updates.
type evictingConditionChangedPredicate struct {
predicate.Funcs
}

func (evictingConditionChangedPredicate) Update(e event.UpdateEvent) bool {
if e.ObjectOld == nil || e.ObjectNew == nil {
return false
}
oldHv, ok1 := e.ObjectOld.(*kvmv1.Hypervisor)
newHv, ok2 := e.ObjectNew.(*kvmv1.Hypervisor)
if !ok1 || !ok2 {
return false
}
oldCond := meta.FindStatusCondition(oldHv.Status.Conditions, kvmv1.ConditionTypeEvicting)
newCond := meta.FindStatusCondition(newHv.Status.Conditions, kvmv1.ConditionTypeEvicting)
switch {
case oldCond == nil && newCond == nil:
return false
case oldCond == nil || newCond == nil:
return true
default:
return oldCond.Status != newCond.Status
}
}
59 changes: 59 additions & 0 deletions internal/controller/gardener_node_lifecycle_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,4 +207,63 @@ var _ = Describe("Gardener Maintenance Controller", func() {
Expect(pdb.Spec.MinAvailable).To(HaveField("IntVal", BeNumerically("==", int32(0))))
})
})

Context("Offboarding taint", func() {
findOffboardingTaint := func(node *corev1.Node) *corev1.Taint {
for i := range node.Spec.Taints {
t := &node.Spec.Taints[i]
if t.Key == taintKeyOffboarding && t.Effect == corev1.TaintEffectNoExecute {
return t
}
}
return nil
}

When("the hypervisor is in maintenance termination and the VMs have been evicted", func() {
BeforeEach(func(ctx SpecContext) {
hypervisor := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, name, hypervisor)).To(Succeed())
hypervisor.Spec.Maintenance = kvmv1.MaintenanceTermination
Expect(k8sClient.Update(ctx, hypervisor)).To(Succeed())

meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeEvicting,
Status: metav1.ConditionFalse,
Reason: "Succeeded",
Message: "All VMs evicted",
})
Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed())
})

It("should add the offboarding NoExecute taint to the node", func(ctx SpecContext) {
_, err := controller.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())

node := &corev1.Node{}
Expect(k8sClient.Get(ctx, name, node)).To(Succeed())
taint := findOffboardingTaint(node)
Expect(taint).NotTo(BeNil())
Expect(taint.Key).To(Equal(taintKeyOffboarding))
Expect(taint.Effect).To(Equal(corev1.TaintEffectNoExecute))
})

It("should be idempotent: a second reconcile must not add a duplicate taint", func(ctx SpecContext) {
_, err := controller.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
_, err = controller.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())

node := &corev1.Node{}
Expect(k8sClient.Get(ctx, name, node)).To(Succeed())

count := 0
for _, t := range node.Spec.Taints {
if t.Key == taintKeyOffboarding {
count++
}
}
Expect(count).To(Equal(1))
})
})
})
})
Loading
Loading