Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions internal/controller/numaresourcesscheduler_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,11 @@
if err := schedupdate.DeploymentTLSSettings(r.SchedulerManifests.Deployment, r.TLSSettings); err != nil {
return nropv1.NUMAResourcesSchedulerStatus{}, err
}

if err := schedupdate.DeploymentAffinityWithStrategy(r.SchedulerManifests.Deployment, instance.Spec); err != nil {

Check failure on line 363 in internal/controller/numaresourcesscheduler_controller.go

View workflow job for this annotation

GitHub Actions / golangci-lint

File is not properly formatted (gci)
if err := schedupdate.DeploymentTopologySpreadConstraints(r.SchedulerManifests.Deployment); err != nil {
return nropv1.NUMAResourcesSchedulerStatus{}, err
}

k8swgrbacupdate.RoleForLeaderElection(r.SchedulerManifests.Role, r.Namespace, nrosched.LeaderElectionResourceName)
k8swgrbacupdate.RoleBinding(r.SchedulerManifests.RoleBinding, r.SchedulerManifests.ServiceAccount.Name, r.Namespace)

Expand Down
188 changes: 0 additions & 188 deletions internal/controller/numaresourcesscheduler_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
Expand Down Expand Up @@ -1032,193 +1031,6 @@ var _ = Describe("Test NUMAResourcesScheduler Reconcile", func() {
Expect(dp.Spec.Template.Spec.Containers[0].Args).To(ContainElement("--tls-cipher-suites=" + updatedSettings.CipherSuites))
})
})

Context("when setting the scheduler PodAntiAffinity", func() {
var (
expectedPodAntiAff *corev1.PodAntiAffinity
expectedStrategy appsv1.DeploymentStrategy
)

BeforeEach(func() {
expectedPodAntiAff = &corev1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "secondary-scheduler",
},
},
TopologyKey: "kubernetes.io/hostname",
},
},
}
expectedStrategy = appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{
MaxUnavailable: ptr.To(intstr.FromInt(1)),
MaxSurge: ptr.To(intstr.FromInt(0)),
},
}
})

Context("set different replicas counts", func() {
type testCase struct {
replicasCount *int32
expectPodAntiAffinity bool
}

DescribeTable("should set PodAntiAffinity for scheduler Deployment according to replicas count", func(ctx context.Context, tc testCase) {
nrs := testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
nrs.Spec.Replicas = tc.replicasCount
initObjects := []runtime.Object{nrs}
initObjects = append(initObjects, fakeNodes(3, 0)...)
reconciler, err := NewFakeNUMAResourcesSchedulerReconciler(initObjects...)
Expect(err).ToNot(HaveOccurred())
// Baseline node affinity comes from the scheduler deployment manifest; reconcile must preserve it.
Expect(reconciler.SchedulerManifests.Deployment.Spec.Template.Spec.Affinity).ToNot(BeNil())
expectedNodeAff := reconciler.SchedulerManifests.Deployment.Spec.Template.Spec.Affinity.NodeAffinity.DeepCopy()

key := client.ObjectKeyFromObject(nrs)
_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())

dp := &appsv1.Deployment{}
dpKey := client.ObjectKey{Namespace: testNamespace, Name: "secondary-scheduler"}
Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity := dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.NodeAffinity).To(Equal(expectedNodeAff))

if !tc.expectPodAntiAffinity {
Expect(affinity.PodAntiAffinity).To(BeNil())
Expect(dp.Spec.Strategy).To(Equal(appsv1.DeploymentStrategy{}))

// no changes expected on second reconcile with same input
_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity = dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.NodeAffinity).To(Equal(expectedNodeAff))
Expect(affinity.PodAntiAffinity).To(BeNil())
Expect(dp.Spec.Strategy).To(Equal(appsv1.DeploymentStrategy{}))
return
}

Expect(affinity.PodAntiAffinity).ToNot(BeNil())
Expect(affinity.PodAntiAffinity).To(Equal(expectedPodAntiAff))
Expect(dp.Spec.Strategy).To(Equal(expectedStrategy))

// no changes expected on second reconcile with same input
_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity = dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.NodeAffinity).To(Equal(expectedNodeAff))
Expect(affinity.PodAntiAffinity).ToNot(BeNil())
Expect(affinity.PodAntiAffinity).To(Equal(expectedPodAntiAff))
Expect(dp.Spec.Strategy).To(Equal(expectedStrategy))
},
Entry("when replicas are not set, expected podAntiAffinity", testCase{expectPodAntiAffinity: true}),
Entry("when replicas are set and zero, no podAntiAffinity is set", testCase{replicasCount: ptr.To(int32(0)), expectPodAntiAffinity: false}),
Entry("when replicas are set and non-zero, no podAntiAffinity is set", testCase{replicasCount: ptr.To(int32(1)), expectPodAntiAffinity: false}),
)
})

Context("switch between replicas counts", func() {
It("should reset podAntiAffinity and strategy - transition from autodetect to explicit replicas count", func(ctx context.Context) {
nrs := testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
nrs.Spec.Replicas = nil // autodetect
initObjects := []runtime.Object{nrs}
initObjects = append(initObjects, fakeNodes(3, 0)...)
reconciler, err := NewFakeNUMAResourcesSchedulerReconciler(initObjects...)
Expect(err).ToNot(HaveOccurred())

key := client.ObjectKeyFromObject(nrs)
_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())

dp := &appsv1.Deployment{}
dpKey := client.ObjectKey{Namespace: testNamespace, Name: "secondary-scheduler"}

Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity := dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.PodAntiAffinity).To(Equal(expectedPodAntiAff))
Expect(dp.Spec.Strategy).To(Equal(expectedStrategy))

// reconcile with non-nil replicas count
Eventually(func(g Gomega) {
g.Expect(reconciler.Client.Get(ctx, key, nrs)).ToNot(HaveOccurred())
nrs.Spec.Replicas = ptr.To(int32(2))
g.Expect(reconciler.Client.Update(ctx, nrs)).ToNot(HaveOccurred())
}).WithPolling(5 * time.Second).WithTimeout(30 * time.Second).Should(Succeed())

_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())

Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity = dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.PodAntiAffinity).To(BeNil())
Expect(dp.Spec.Strategy).To(Equal(appsv1.DeploymentStrategy{}))

// reconcile with a different non-nil replicas count should keep the same result
Eventually(func(g Gomega) {
g.Expect(reconciler.Client.Get(ctx, key, nrs)).ToNot(HaveOccurred())
nrs.Spec.Replicas = ptr.To(int32(0))
g.Expect(reconciler.Client.Update(ctx, nrs)).ToNot(HaveOccurred())
}).WithPolling(5 * time.Second).WithTimeout(30 * time.Second).Should(Succeed())

_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())

Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity = dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.PodAntiAffinity).To(BeNil())
Expect(dp.Spec.Strategy).To(Equal(appsv1.DeploymentStrategy{}))

})

It("should reset podAntiAffinity and strategy - transition from explicit replicas count to autodetect", func(ctx context.Context) {
nrs := testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
nrs.Spec.Replicas = ptr.To(int32(3))
initObjects := []runtime.Object{nrs}
initObjects = append(initObjects, fakeNodes(3, 0)...)
reconciler, err := NewFakeNUMAResourcesSchedulerReconciler(initObjects...)
Expect(err).ToNot(HaveOccurred())

key := client.ObjectKeyFromObject(nrs)
_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())

dp := &appsv1.Deployment{}
dpKey := client.ObjectKey{Namespace: testNamespace, Name: "secondary-scheduler"}
Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity := dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.PodAntiAffinity).To(BeNil())
Expect(dp.Spec.Strategy).To(Equal(appsv1.DeploymentStrategy{}))

Eventually(func(g Gomega) {
g.Expect(reconciler.Client.Get(ctx, key, nrs)).ToNot(HaveOccurred())
nrs.Spec.Replicas = nil // autodetect
g.Expect(reconciler.Client.Update(ctx, nrs)).ToNot(HaveOccurred())
}).WithPolling(5 * time.Second).WithTimeout(30 * time.Second).Should(Succeed())

_, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())

Expect(reconciler.Client.Get(ctx, dpKey, dp)).To(Succeed())
affinity = dp.Spec.Template.Spec.Affinity
Expect(affinity).ToNot(BeNil())
Expect(affinity.PodAntiAffinity).To(Equal(expectedPodAntiAff))
Expect(dp.Spec.Strategy).To(Equal(expectedStrategy))
})
})
})
})

var _ = Describe("Test computeSchedulerReplicas", func() {
Expand Down
66 changes: 23 additions & 43 deletions pkg/objectupdate/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,13 @@
package sched

import (
"errors"
"fmt"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"

"github.com/k8stopologyawareschedwg/deployer/pkg/flagcodec"
k8swgmanifests "github.com/k8stopologyawareschedwg/deployer/pkg/manifests"
Expand Down Expand Up @@ -106,52 +103,35 @@ func DeploymentTLSSettings(dp *appsv1.Deployment, tlsSettings objtls.Settings) e
return nil
}

// DeploymentAffinityWithStrategy configures required pod anti-affinity on the scheduler
// deployment only when the CR leaves replica count to autodetection (Replicas unset).
// An explicit non-nil Replicas value means the user chose a replica count and keeps
// full control (no required pod anti-affinity).
func DeploymentAffinityWithStrategy(dp *appsv1.Deployment, spec nropv1.NUMAResourcesSchedulerSpec) error {
if spec.Replicas != nil {
if dp.Spec.Template.Spec.Affinity != nil {
dp.Spec.Template.Spec.Affinity.PodAntiAffinity = nil
}
dp.Spec.Strategy = appsv1.DeploymentStrategy{}
return nil
}

func DeploymentTopologySpreadConstraints(dp *appsv1.Deployment) error {
labels := dp.Spec.Template.Labels
if len(labels) == 0 {
return errors.New("no labels provided for PodAntiAffinity")
}

if dp.Spec.Template.Spec.Affinity == nil {
dp.Spec.Template.Spec.Affinity = &corev1.Affinity{}
}

dp.Spec.Template.Spec.Affinity.PodAntiAffinity = &corev1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: "kubernetes.io/hostname",
if labels == nil {
labels = map[string]string{}
}
/*
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: my-app-2
matchLabelKeys:
- pod-template-hash
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
*/
dp.Spec.Template.Spec.TopologySpreadConstraints = []corev1.TopologySpreadConstraint{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
MaxSkew: 1,
TopologyKey: "kubernetes.io/hostname",
WhenUnsatisfiable: corev1.DoNotSchedule,
MatchLabelKeys: []string{"pod-template-hash"},
},
}
klog.V(3).InfoS("Scheduler Deployment affinity", "podAntiAffinity", dp.Spec.Template.Spec.Affinity.PodAntiAffinity.String())
// NOTE: With replicas=1 and no PodAntiAffinity, the default strategy (surge 1, then remove old)
// works as expected — the new pod can schedule anywhere, no constraints block it.
dp.Spec.Strategy = appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{
MaxUnavailable: ptr.To(intstr.FromInt(1)),
MaxSurge: ptr.To(intstr.FromInt(0)),
},
}
klog.V(3).InfoS("Scheduler Deployment Rollout Strategy", "rolloutStrategy", dp.Spec.Strategy.String())
return nil
}

func SchedulerConfig(cm *corev1.ConfigMap, name string, params *k8swgmanifests.ConfigParams) error {
if cm.Data == nil {
return fmt.Errorf("no data found in ConfigMap: %s/%s", cm.Namespace, cm.Name)
Expand Down
Loading
Loading