Skip to content

Commit c2e260d

Browse files
committed
ctrl: sched: add topologySpreadConstraints to deployment
To evenly spread the pods across the nodes in a a balanced way taking into account the new replicaset pods while rollout. Signed-off-by: Shereen Haj <shajmakh@redhat.com>
1 parent 59d695d commit c2e260d

5 files changed

Lines changed: 404 additions & 1 deletion

File tree

internal/controller/numaresourcesscheduler_controller.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,10 @@ func (r *NUMAResourcesSchedulerReconciler) syncNUMASchedulerResources(ctx contex
364364
return nropv1.NUMAResourcesSchedulerStatus{}, err
365365
}
366366

367+
if err := schedupdate.DeploymentTopologySpreadConstraints(r.SchedulerManifests.Deployment); err != nil {
368+
return nropv1.NUMAResourcesSchedulerStatus{}, err
369+
}
370+
367371
k8swgrbacupdate.RoleForLeaderElection(r.SchedulerManifests.Role, r.Namespace, nrosched.LeaderElectionResourceName)
368372
k8swgrbacupdate.RoleBinding(r.SchedulerManifests.RoleBinding, r.SchedulerManifests.ServiceAccount.Name, r.Namespace)
369373

internal/controller/numaresourcesscheduler_controller_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,28 @@ var _ = Describe("Test NUMAResourcesScheduler Reconcile", func() {
10301030
Expect(dp.Spec.Template.Spec.Containers[0].Args).To(ContainElement("--tls-min-version=" + updatedSettings.MinVersion))
10311031
Expect(dp.Spec.Template.Spec.Containers[0].Args).To(ContainElement("--tls-cipher-suites=" + updatedSettings.CipherSuites))
10321032
})
1033+
1034+
It("should set the TopologySpreadConstraints in the deployment by default", func() {
1035+
_, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: nrsKey})
1036+
Expect(err).ToNot(HaveOccurred())
1037+
1038+
Expect(reconciler.Client.Get(context.TODO(), nrsKey, nrs)).To(Succeed())
1039+
dpKey := client.ObjectKey{Namespace: nrs.Status.Deployment.Namespace, Name: nrs.Status.Deployment.Name}
1040+
dp := &appsv1.Deployment{}
1041+
Expect(reconciler.Client.Get(context.TODO(), dpKey, dp)).To(Succeed())
1042+
Expect(dp.Spec.Template.Spec.TopologySpreadConstraints).To(HaveLen(1))
1043+
1044+
expectedConstraint := corev1.TopologySpreadConstraint{
1045+
MaxSkew: 1,
1046+
TopologyKey: "kubernetes.io/hostname",
1047+
WhenUnsatisfiable: corev1.DoNotSchedule,
1048+
MatchLabelKeys: []string{"pod-template-hash"},
1049+
LabelSelector: &metav1.LabelSelector{
1050+
MatchLabels: dp.Spec.Template.Labels,
1051+
},
1052+
}
1053+
Expect(dp.Spec.Template.Spec.TopologySpreadConstraints[0]).To(Equal(expectedConstraint))
1054+
})
10331055
})
10341056
})
10351057

pkg/objectupdate/sched/sched.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
appsv1 "k8s.io/api/apps/v1"
2323
corev1 "k8s.io/api/core/v1"
2424
"k8s.io/apimachinery/pkg/api/resource"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2526
"k8s.io/klog/v2"
2627

2728
"github.com/k8stopologyawareschedwg/deployer/pkg/flagcodec"
@@ -102,6 +103,28 @@ func DeploymentTLSSettings(dp *appsv1.Deployment, tlsSettings objtls.Settings) e
102103
return nil
103104
}
104105

106+
func DeploymentTopologySpreadConstraints(dp *appsv1.Deployment) error {
107+
labels := dp.Spec.Template.Labels
108+
if len(labels) == 0 {
109+
return fmt.Errorf("no labels found in deployment template")
110+
}
111+
112+
schedConstr := corev1.TopologySpreadConstraint{
113+
LabelSelector: &metav1.LabelSelector{
114+
MatchLabels: labels,
115+
},
116+
MaxSkew: 1,
117+
TopologyKey: "kubernetes.io/hostname",
118+
WhenUnsatisfiable: corev1.DoNotSchedule,
119+
// this is needed for safe rollouts to allow ignoring the old replicaset and calculates the spread
120+
// purely based on the new replicaset, ensuring the final state is perfectly balanced without stalling.
121+
MatchLabelKeys: []string{"pod-template-hash"},
122+
}
123+
124+
dp.Spec.Template.Spec.TopologySpreadConstraints = []corev1.TopologySpreadConstraint{schedConstr}
125+
klog.V(3).InfoS("scheduler deployment topology spread constraints", "constraints", schedConstr.String())
126+
return nil
127+
}
105128
func SchedulerConfig(cm *corev1.ConfigMap, name string, params *k8swgmanifests.ConfigParams) error {
106129
if cm.Data == nil {
107130
return fmt.Errorf("no data found in ConfigMap: %s/%s", cm.Namespace, cm.Name)

pkg/objectupdate/sched/sched_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import (
2424
"testing"
2525
"time"
2626

27+
"github.com/google/go-cmp/cmp"
28+
2729
appsv1 "k8s.io/api/apps/v1"
2830
corev1 "k8s.io/api/core/v1"
2931
"k8s.io/apimachinery/pkg/api/resource"
@@ -631,6 +633,43 @@ func TestDeploymentTLSSettingsRepeated(t *testing.T) {
631633
}
632634
}
633635

636+
func TestDeploymentTopologySpreadConstraintsNoLabels(t *testing.T) {
637+
dp := dpMinimal.DeepCopy()
638+
dp.Spec.Template.Labels = nil
639+
if err := DeploymentTopologySpreadConstraints(dp); err == nil {
640+
t.Fatalf("expected error but got nil")
641+
}
642+
643+
dp.Spec.Template.Labels = map[string]string{}
644+
if err := DeploymentTopologySpreadConstraints(dp); err == nil {
645+
t.Fatalf("expected error but got nil")
646+
}
647+
}
648+
649+
func TestDeploymentTopologySpreadConstraints(t *testing.T) {
650+
dp := dpMinimal.DeepCopy()
651+
dp.Spec.Template.Labels = map[string]string{
652+
"app": "numaresources-scheduler",
653+
}
654+
if err := DeploymentTopologySpreadConstraints(dp); err != nil {
655+
t.Fatalf("unexpected error: %v", err)
656+
}
657+
expectedConstraints := []corev1.TopologySpreadConstraint{
658+
{
659+
LabelSelector: &metav1.LabelSelector{
660+
MatchLabels: dp.Spec.Template.Labels,
661+
},
662+
MaxSkew: 1,
663+
TopologyKey: "kubernetes.io/hostname",
664+
WhenUnsatisfiable: corev1.DoNotSchedule,
665+
MatchLabelKeys: []string{"pod-template-hash"},
666+
},
667+
}
668+
if cmp.Diff(dp.Spec.Template.Spec.TopologySpreadConstraints, expectedConstraints) != "" {
669+
t.Errorf("constraints mismatch\ngot: %v\nexpected: %v", dp.Spec.Template.Spec.TopologySpreadConstraints, expectedConstraints)
670+
}
671+
}
672+
634673
func mustParseResource(t *testing.T, v string) resource.Quantity {
635674
t.Helper()
636675
qty, err := resource.ParseQuantity(v)

0 commit comments

Comments
 (0)