@@ -19,6 +19,7 @@ package controller
1919import (
2020 "context"
2121 "fmt"
22+ "sigs.k8s.io/controller-runtime/pkg/handler"
2223 "time"
2324
2425 appsv1 "k8s.io/api/apps/v1"
@@ -27,8 +28,10 @@ import (
2728 apiequality "k8s.io/apimachinery/pkg/api/equality"
2829 apierrors "k8s.io/apimachinery/pkg/api/errors"
2930 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31+ "k8s.io/apimachinery/pkg/labels"
3032 "k8s.io/apimachinery/pkg/runtime"
3133 "k8s.io/klog/v2"
34+ "k8s.io/utils/ptr"
3235 ctrl "sigs.k8s.io/controller-runtime"
3336 "sigs.k8s.io/controller-runtime/pkg/builder"
3437 "sigs.k8s.io/controller-runtime/pkg/client"
@@ -60,7 +63,6 @@ type NUMAResourcesSchedulerReconciler struct {
6063 Scheme * runtime.Scheme
6164 SchedulerManifests schedmanifests.Manifests
6265 Namespace string
63- AutodetectReplicas int
6466}
6567
6668//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=*
@@ -126,6 +128,18 @@ func (r *NUMAResourcesSchedulerReconciler) SetupWithManager(mgr ctrl.Manager) er
126128 ! apiequality .Semantic .DeepEqual (e .ObjectNew .GetAnnotations (), e .ObjectOld .GetAnnotations ())
127129 },
128130 }
131+ nodesPredicate := predicate.Funcs {
132+ // we only care about cases when nodes are getting created or deleted
133+ CreateFunc : func (e event.TypedCreateEvent [client.Object ]) bool {
134+ return true
135+ },
136+ DeleteFunc : func (e event.TypedDeleteEvent [client.Object ]) bool {
137+ return true
138+ },
139+ UpdateFunc : func (e event.TypedUpdateEvent [client.Object ]) bool {
140+ return false
141+ },
142+ }
129143
130144 return ctrl .NewControllerManagedBy (mgr ).
131145 For (& nropv1.NUMAResourcesScheduler {}).
@@ -134,9 +148,25 @@ func (r *NUMAResourcesSchedulerReconciler) SetupWithManager(mgr ctrl.Manager) er
134148 Owns (& corev1.ServiceAccount {}, builder .WithPredicates (p )).
135149 Owns (& corev1.ConfigMap {}, builder .WithPredicates (p )).
136150 Owns (& appsv1.Deployment {}, builder .WithPredicates (p )).
151+ Watches (& corev1.Node {}, handler .EnqueueRequestsFromMapFunc (r .nodeToNUMAResourcesScheduler ),
152+ builder .WithPredicates (nodesPredicate )).
137153 Complete (r )
138154}
139155
156+ func (r * NUMAResourcesSchedulerReconciler ) nodeToNUMAResourcesScheduler (ctx context.Context , object client.Object ) []reconcile.Request {
157+ var requests []reconcile.Request
158+ nross := & nropv1.NUMAResourcesSchedulerList {}
159+ if err := r .List (ctx , nross ); err != nil {
160+ klog .ErrorS (err , "failed to List NUMAResourcesScheduler" )
161+ }
162+ for _ , instance := range nross .Items {
163+ requests = append (requests , reconcile.Request {NamespacedName : client.ObjectKey {
164+ Name : instance .Name ,
165+ }})
166+ }
167+ return requests
168+ }
169+
140170func (r * NUMAResourcesSchedulerReconciler ) reconcileResource (ctx context.Context , instance * nropv1.NUMAResourcesScheduler ) (reconcile.Result , string , error ) {
141171 schedStatus , err := r .syncNUMASchedulerResources (ctx , instance )
142172 if err != nil {
@@ -171,13 +201,22 @@ func isDeploymentRunning(ctx context.Context, c client.Client, key nropv1.Namesp
171201 return false , nil
172202}
173203
174- func (r * NUMAResourcesSchedulerReconciler ) computeSchedulerReplicas (schedSpec nropv1.NUMAResourcesSchedulerSpec ) * int32 {
175- // the api validation/normalization layer must ensure this value is != nil
176- if * schedSpec .Replicas >= 0 { // 0 is legit value to disable the deployment
177- return schedSpec .Replicas
204+ func (r * NUMAResourcesSchedulerReconciler ) computeSchedulerReplicas (ctx context.Context , schedSpec nropv1.NUMAResourcesSchedulerSpec ) (* int32 , error ) {
205+ // do not autodetect if explicitly set by the user
206+ if schedSpec .Replicas != nil {
207+ return schedSpec .Replicas , nil
208+ }
209+ nodeList := & corev1.NodeList {}
210+ if err := r .Client .List (ctx , nodeList , & client.ListOptions {
211+ LabelSelector : labels .SelectorFromSet (map [string ]string {
212+ "node-role.kubernetes.io/control-plane" : "" ,
213+ }),
214+ }); err != nil {
215+ return schedSpec .Replicas , err
178216 }
179- v := int32 (r .AutodetectReplicas )
180- return & v
217+ replicas := ptr .To (int32 (len (nodeList .Items )))
218+ klog .InfoS ("autodetect scheduler replicas" , "replicas" , * replicas )
219+ return replicas , nil
181220}
182221
183222func (r * NUMAResourcesSchedulerReconciler ) syncNUMASchedulerResources (ctx context.Context , instance * nropv1.NUMAResourcesScheduler ) (nropv1.NUMAResourcesSchedulerStatus , error ) {
@@ -186,6 +225,11 @@ func (r *NUMAResourcesSchedulerReconciler) syncNUMASchedulerResources(ctx contex
186225
187226 schedSpec := instance .Spec .Normalize ()
188227 cacheResyncPeriod := unpackAPIResyncPeriod (schedSpec .CacheResyncPeriod )
228+ replicas , err := r .computeSchedulerReplicas (ctx , schedSpec )
229+ if err != nil {
230+ return nropv1.NUMAResourcesSchedulerStatus {}, fmt .Errorf ("failed to compute scheduler replicas: %w" , err )
231+ }
232+ schedSpec .Replicas = replicas
189233 params := configParamsFromSchedSpec (schedSpec , cacheResyncPeriod , r .Namespace )
190234
191235 schedName , ok := schedstate .SchedulerNameFromObject (r .SchedulerManifests .ConfigMap )
@@ -207,7 +251,7 @@ func (r *NUMAResourcesSchedulerReconciler) syncNUMASchedulerResources(ctx contex
207251 },
208252 }
209253
210- r .SchedulerManifests .Deployment .Spec .Replicas = r . computeSchedulerReplicas ( schedSpec )
254+ r .SchedulerManifests .Deployment .Spec .Replicas = schedSpec . Replicas
211255 klog .V (4 ).InfoS ("using scheduler replicas" , "replicas" , * r .SchedulerManifests .Deployment .Spec .Replicas )
212256 // TODO: if replicas doesn't make sense (autodetect disabled and user set impossible value) then we
213257 // should set a degraded state
0 commit comments