pkg/clusteragent/autoscaling/cluster/spot: wait for tracker to settle before advancing fake clock

AlexanderYastrebov · claude · AlexanderYastrebov · commit ff0a97c0db0f · 2026-04-27T11:07:29.000+02:00
Test scenarios use fake clock to speed-up on-demand fallback and rebalancing.
The rebalancer skips an iteration when there are pending pods or in-flight admissions
therefore update tests to step fake clock after workload updates have settled.

Also refactor manual pod creation/deletion into fakeDeployment Reconcile and ScaleDown methods.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/pkg/clusteragent/autoscaling/cluster/spot/cluster_test.go b/pkg/clusteragent/autoscaling/cluster/spot/cluster_test.go
@@ -77,6 +77,7 @@ type fakeDeployment struct {
 	name               string
 	existingReplicaSet string
 	podSelector        map[string]string
+	replicas           int
 }
 
 // newFakeCluster creates a fakeCluster.
@@ -135,8 +136,8 @@ func (c *fakeCluster) OnPodDeleted(hook deletionHook) {
 	c.podDeletedHooks = append(c.podDeletedHooks, hook)
 }
 
-// CreatePod runs all registered admission hooks on the pod then creates it as Pending.
-func (c *fakeCluster) CreatePod(pod *corev1.Pod) {
+// createPod runs all registered admission hooks on the pod then creates it as Pending.
+func (c *fakeCluster) createPod(pod *corev1.Pod) {
 	unmodifiedCopy := pod.DeepCopy()
 	for _, hook := range c.podCreatedHooks {
 		updated, err := hook(pod)
@@ -417,16 +418,61 @@ func (d *fakeDeployment) rolloutWithDelay(replicas int) string {
 	// A new ReplicaSet created
 	newReplicaSet := replicaSetName(d.name)
 	for range replicas {
-		d.cluster.CreatePod(newPod(d.namespace, kubernetes.ReplicaSetKind, newReplicaSet, d.podSelector))
+		d.cluster.createPod(newPod(d.namespace, kubernetes.ReplicaSetKind, newReplicaSet, d.podSelector))
 	}
 	// Existing ReplicaSet is scaled down
 	if d.existingReplicaSet != "" {
 		d.cluster.DeleteOwnerPods(kubernetes.ReplicaSetKind, d.namespace, d.existingReplicaSet)
 	}
 	d.existingReplicaSet = newReplicaSet
+	d.replicas = replicas
 	return newReplicaSet
 }
 
+// Reconcile creates pods to bring the current ReplicaSet back to d.replicas,
+// counting existing non-terminal pods and creating only the missing number.
+func (d *fakeDeployment) Reconcile() {
+	if d.existingReplicaSet == "" {
+		return
+	}
+	pods := d.cluster.ListOwnerPods(kubernetes.ReplicaSetKind, d.namespace, d.existingReplicaSet)
+	active := 0
+	for _, pod := range pods {
+		phase := corev1.PodPhase(pod.Phase)
+		if phase != corev1.PodSucceeded && phase != corev1.PodFailed {
+			active++
+		}
+	}
+	for range max(0, d.replicas-active) {
+		d.cluster.createPod(newPod(d.namespace, kubernetes.ReplicaSetKind, d.existingReplicaSet, d.podSelector))
+	}
+}
+
+// ScaleDown deletes pods selected by deleteFilter and updates the replica count to the number of remaining pods.
+func (d *fakeDeployment) ScaleDown(deleteFilter func([]*workloadmeta.KubernetesPod) []*workloadmeta.KubernetesPod) {
+	t := d.cluster.t
+	t.Helper()
+	rs := d.ReplicaSet()
+	pods := d.cluster.ListOwnerPods(kubernetes.ReplicaSetKind, d.namespace, rs)
+	toDelete := deleteFilter(pods)
+
+	d.replicas = len(pods) - len(toDelete)
+
+	deleted := make(map[string]struct{}, len(toDelete))
+	for _, pod := range toDelete {
+		d.cluster.DeletePod(pod)
+		deleted[pod.ID] = struct{}{}
+	}
+	require.Eventually(t, func() bool {
+		for _, pod := range d.cluster.ListOwnerPods(kubernetes.ReplicaSetKind, d.namespace, rs) {
+			if _, ok := deleted[pod.ID]; ok {
+				return false
+			}
+		}
+		return true
+	}, 5*time.Second, 100*time.Millisecond)
+}
+
 func async(f func(workloadmeta.Entity), e workloadmeta.Entity) {
 	go func() {
 		time.Sleep(time.Duration(10+rand.N(40)) * time.Millisecond)
diff --git a/pkg/clusteragent/autoscaling/cluster/spot/pod_tracker.go b/pkg/clusteragent/autoscaling/cluster/spot/pod_tracker.go
@@ -306,7 +306,7 @@ func (ps *ownerPodSet) track(uid string, isSpot bool, info podInfo, now time.Tim
 // getPodToDelete returns the uid and name of a pod to delete to make progress toward the desired config.
 // It returns empty strings if no deletion is needed.
 func (ps *ownerPodSet) getPodToDelete(lastUpdatedBefore time.Time) (string, string) {
-	if ps.admissionSpotCount > 0 || ps.admissionOnDemandCount > 0 {
+	if ps.hasAdmissions() {
 		return "", ""
 	}
 
@@ -340,6 +340,11 @@ func (ps *ownerPodSet) getPodToDelete(lastUpdatedBefore time.Time) (string, stri
 	return "", ""
 }
 
+// hasAdmissions returns true if pod set has admitted but not yet tracked pods.
+func (ps *ownerPodSet) hasAdmissions() bool {
+	return ps.admissionSpotCount > 0 || ps.admissionOnDemandCount > 0
+}
+
 // hasPending returns true if any tracked pod is in PodPending phase.
 func (ps *ownerPodSet) hasPending() bool {
 	const pending = string(corev1.PodPending)
diff --git a/pkg/clusteragent/autoscaling/cluster/spot/scheduler_test.go b/pkg/clusteragent/autoscaling/cluster/spot/scheduler_test.go
@@ -208,34 +208,32 @@ func TestScenarios(t *testing.T) {
 
 		// When
 		// ReplicaSet recreates pods
-		for range 6 {
-			cluster.CreatePod(newPod("default", kubernetes.ReplicaSetKind, rs, nil))
-		}
+		d.Reconcile()
 
 		// Then
 		// Fallback to on-demand
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(10))
 
 		// When
 		cluster.AddSpotNode("new-spot")
-		// Advance past disabled interval to re-enable spot scheduling
-		clk.Step(s.Config().FallbackDuration)
+
+		// Advance past disabled interval to re-enable spot scheduling.
+		stepClockAfterUpdatesSettled(t, s, clk, s.Config().FallbackDuration, "apps", kubernetes.DeploymentKind, d.namespace, d.name)
+
 		requireEventually(t, func() bool {
 			return !s.IsSpotSchedulingDisabled("apps", kubernetes.DeploymentKind, d.namespace, d.name)
 		})
 
 		// Rebalancing
 		for i := range 6 {
 			// When
-			clk.Step(s.Config().RebalanceStabilizationPeriod)
+			stepClockAfterUpdatesSettled(t, s, clk, s.Config().RebalanceStabilizationPeriod, "apps", kubernetes.DeploymentKind, d.namespace, d.name)
 
 			// Then: excess on-demand pod evicted
 			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(10-1-i))
 
 			// ReplicaSet recreates pod
-			cluster.CreatePod(newPod("default", kubernetes.ReplicaSetKind, rs, nil))
-			// Important: wait for it to be Running before next step
-			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(i+1))
+			d.Reconcile()
 		}
 
 		// Then
@@ -275,13 +273,12 @@ func TestScenarios(t *testing.T) {
 
 				cluster.DeletePod(pod)
 				deleted[pod.ID] = struct{}{}
-
-				cluster.CreatePod(newPod("default", kubernetes.ReplicaSetKind, rs, nil))
 			}
-
 			// Important: wait until deletion is complete before checking expectations to avoid counting deleted pods.
 			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectHasNoneOf(deleted))
 
+			d.Reconcile()
+
 			// Then
 			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(expectedSpot))
 			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(expectedOnDemand))
@@ -302,14 +299,15 @@ func TestScenarios(t *testing.T) {
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(4))
 
 		// When: scale down to 5 replicas leaving 2 spot / 3 on-demand — ratio is off
-		scaleDown(t, cluster, kubernetes.ReplicaSetKind, "default", rs, 2, 3)
+		d.ScaleDown(keep(2, 3))
+
+		stepClockAfterUpdatesSettled(t, s, clk, s.Config().RebalanceStabilizationPeriod, "apps", kubernetes.DeploymentKind, d.namespace, d.name)
 
-		// When: rebalancing evicts the excess on-demand pod
-		clk.Step(s.Config().RebalanceStabilizationPeriod)
+		// Then: excess on-demand pod evicted
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(2))
 
 		// ReplicaSet recreates the evicted pod as spot
-		cluster.CreatePod(newPod("default", kubernetes.ReplicaSetKind, rs, nil))
+		d.Reconcile()
 
 		// Then: 3 spot / 2 on-demand (60% of 5, minOnDemand=2 satisfied)
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(3))
@@ -330,17 +328,19 @@ func TestScenarios(t *testing.T) {
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(4))
 
 		// When: scale down to 5 replicas leaving 5 spot / 0 on-demand — on-demand count is below minOnDemand=2
-		scaleDown(t, cluster, kubernetes.ReplicaSetKind, "default", rs, 5, 0)
+		d.ScaleDown(keep(5, 0))
 
 		// Rebalancing evicts spot pods until on-demand count reaches minOnDemand=2.
 		// Each evicted spot pod is recreated by the ReplicaSet as on-demand.
 		for i := range 2 {
-			clk.Step(s.Config().RebalanceStabilizationPeriod)
+			// When
+			stepClockAfterUpdatesSettled(t, s, clk, s.Config().RebalanceStabilizationPeriod, "apps", kubernetes.DeploymentKind, d.namespace, d.name)
+
+			// Then: excess spot pod evicted
 			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(4-i))
 
 			// ReplicaSet recreates the evicted pod as on-demand (on-demand count still below minOnDemand)
-			cluster.CreatePod(newPod("default", kubernetes.ReplicaSetKind, rs, nil))
-			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(i+1))
+			d.Reconcile()
 		}
 
 		// Then: 3 spot / 2 on-demand (60% of 5, minOnDemand=2 satisfied)
@@ -364,14 +364,15 @@ func TestScenarios(t *testing.T) {
 
 		// When: scale down to 5 replicas leaving 4 spot / 1 on-demand — on-demand satisfies minOnDemand=1
 		// but spot count exceeds the desired 3 (60% of 5).
-		scaleDown(t, cluster, kubernetes.ReplicaSetKind, "default", rs, 4, 1)
+		d.ScaleDown(keep(4, 1))
 
-		// When: rebalancing evicts the excess spot pod
-		clk.Step(s.Config().RebalanceStabilizationPeriod)
+		stepClockAfterUpdatesSettled(t, s, clk, s.Config().RebalanceStabilizationPeriod, "apps", kubernetes.DeploymentKind, d.namespace, d.name)
+
+		// Then: excess spot pod evicted
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(3))
 
 		// ReplicaSet recreates the evicted pod as on-demand
-		cluster.CreatePod(newPod("default", kubernetes.ReplicaSetKind, rs, nil))
+		d.Reconcile()
 
 		// Then: 3 spot / 2 on-demand (60% of 5, minOnDemand=1 satisfied)
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(3))
@@ -392,10 +393,11 @@ func TestScenarios(t *testing.T) {
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(4))
 
 		// When: scale down to 5 replicas preserving the ratio — 3 spot / 2 on-demand (60% of 5)
-		scaleDown(t, cluster, kubernetes.ReplicaSetKind, "default", rs, 3, 2)
+		d.ScaleDown(keep(3, 2))
+
+		stepClockAfterUpdatesSettled(t, s, clk, s.Config().RebalanceStabilizationPeriod, "apps", kubernetes.DeploymentKind, d.namespace, d.name)
 
 		// Then: ratio is already correct; rebalancing does not evict any pod
-		clk.Step(s.Config().RebalanceStabilizationPeriod)
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(3))
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(2))
 	})
@@ -444,10 +446,13 @@ func TestScenarios(t *testing.T) {
 
 		// Then: rebalancer evicts one on-demand pod per cycle; RS recreates it as spot.
 		for i := range expectedSpot {
-			clk.Step(s.Config().RebalanceStabilizationPeriod)
+			// When
+			stepClockAfterUpdatesSettled(t, s, clk, s.Config().RebalanceStabilizationPeriod, "apps", kubernetes.DeploymentKind, d.namespace, d.name)
+
+			// Then: excess on-demand pod evicted
 			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningOnDemand(replicas-1-i))
-			cluster.CreatePod(newPod("default", kubernetes.ReplicaSetKind, rs, nil))
-			requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(i+1))
+
+			d.Reconcile()
 		}
 
 		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectRunningSpot(expectedSpot))
@@ -537,15 +542,6 @@ func TestScenarios(t *testing.T) {
 		total, spot := s.TrackedCounts("apps", kubernetes.DeploymentKind, d.namespace, d.name)
 		assert.Zero(t, total)
 		assert.Zero(t, spot)
-
-		// When
-		deleted := make(map[string]struct{}, 5)
-		for _, pod := range cluster.ListOwnerPods(kubernetes.ReplicaSetKind, "default", rs) {
-			cluster.DeletePod(pod)
-			deleted[pod.ID] = struct{}{}
-		}
-		// Then
-		requireOwnerPods(cluster, kubernetes.ReplicaSetKind, "default", rs, expectHasNoneOf(deleted))
 	})
 
 	t.Run("Restarted scheduler tracks existing pods", func(t *testing.T) {
@@ -579,45 +575,37 @@ func TestScenarios(t *testing.T) {
 	})
 }
 
-// scaleDown simulates a Deployment scale-down by deleting pods to reach the expected spot/on-demand counts.
-func scaleDown(t *testing.T, cluster *fakeCluster, ownerKind, namespace, name string, expectSpot, expectOnDemand int) {
-	t.Helper()
-	pods := cluster.ListOwnerPods(ownerKind, namespace, name)
-
-	currentSpot, currentOnDemand := 0, 0
-	for _, pod := range pods {
-		if spot.IsSpotAssigned(pod) {
-			currentSpot++
-		} else {
-			currentOnDemand++
-		}
-	}
-
-	require.GreaterOrEqual(t, currentSpot, expectSpot, "expectSpot=%d exceeds current spot count %d", expectSpot, currentSpot)
-	require.GreaterOrEqual(t, currentOnDemand, expectOnDemand, "expectOnDemand=%d exceeds current on-demand count %d", expectOnDemand, currentOnDemand)
-
-	spotToDelete := currentSpot - expectSpot
-	onDemandToDelete := currentOnDemand - expectOnDemand
-	deleted := make(map[string]struct{})
-	for _, pod := range pods {
-		if spot.IsSpotAssigned(pod) {
-			if spotToDelete > 0 {
-				cluster.DeletePod(pod)
-				deleted[pod.ID] = struct{}{}
-				spotToDelete--
-			}
-		} else {
-			if onDemandToDelete > 0 {
-				cluster.DeletePod(pod)
-				deleted[pod.ID] = struct{}{}
-				onDemandToDelete--
+// keep returns a filter that retains given number of spot and on-demand pods.
+func keep(spotCount, onDemandCount int) func([]*workloadmeta.KubernetesPod) []*workloadmeta.KubernetesPod {
+	return func(pods []*workloadmeta.KubernetesPod) []*workloadmeta.KubernetesPod {
+		var toDelete []*workloadmeta.KubernetesPod
+		for _, pod := range pods {
+			if spot.IsSpotAssigned(pod) {
+				if spotCount > 0 {
+					spotCount--
+				} else {
+					toDelete = append(toDelete, pod)
+				}
+			} else {
+				if onDemandCount > 0 {
+					onDemandCount--
+				} else {
+					toDelete = append(toDelete, pod)
+				}
 			}
 		}
+		return toDelete
 	}
+}
 
-	requireOwnerPods(cluster, ownerKind, namespace, name, expectHasNoneOf(deleted))
-	requireOwnerPods(cluster, ownerKind, namespace, name, expectRunningSpot(expectSpot))
-	requireOwnerPods(cluster, ownerKind, namespace, name, expectRunningOnDemand(expectOnDemand))
+// stepClockAfterUpdatesSettled waits for the pod tracker to have no in-flight admissions or pending pods
+// for the given workload, then advances the fake clock by duration.
+func stepClockAfterUpdatesSettled(t *testing.T, s *spot.TestScheduler, clk *clocktesting.FakeClock, duration time.Duration, group, kind, namespace, name string) {
+	t.Helper()
+	requireEventually(t, func() bool {
+		return !s.HasAdmissionsOrPending(group, kind, namespace, name)
+	})
+	clk.Step(duration)
 }
 
 func requireEventually(t *testing.T, condition func() bool, msgAndArgs ...any) {
@@ -647,6 +635,8 @@ func (h *spewStringer[T]) String() string {
 
 // requireOwnerPods checks that all pods owned by ownerKind/namespace/ownerName eventually satisfy check.
 func requireOwnerPods(c *fakeCluster, ownerKind, namespace, ownerName string, check func(wlm []*workloadmeta.KubernetesPod) bool) {
+	c.T().Helper()
+
 	pods := new(spewStringer[[]*workloadmeta.KubernetesPod])
 	requireEventually(c.T(), func() bool {
 		return check(pods.set(c.ListOwnerPods(ownerKind, namespace, ownerName)))
diff --git a/pkg/clusteragent/autoscaling/cluster/spot/testing_helpers.go b/pkg/clusteragent/autoscaling/cluster/spot/testing_helpers.go
@@ -35,6 +35,24 @@ func NewTestScheduler(config Config, clk clock.WithTicker, wlm workloadmeta.Comp
 	return newScheduler(config, clk, wlm, evictorFunc, patcherFunc, dynamicClient, newWLMPodLister(wlm), isLeader)
 }
 
+// HasAdmissionsOrPending reports whether the pod tracker has any in-flight admissions or pending pods
+// for the given workload. Used in tests to wait for pod tracker updates to propagate before advancing the clock.
+func (s *TestScheduler) HasAdmissionsOrPending(group, kind, namespace, name string) bool {
+	s.tracker.mu.RLock()
+	defer s.tracker.mu.RUnlock()
+	w := objectRef{Group: group, Kind: kind, Namespace: namespace, Name: name}
+	owners, ok := s.tracker.podSets[w]
+	if !ok {
+		return true
+	}
+	for _, ps := range owners {
+		if ps.hasAdmissions() || ps.hasPending() {
+			return true
+		}
+	}
+	return false
+}
+
 // TrackedCounts returns the total and spot tracked pod counts (including in-flight admissions) for the given workload.
 func (s *TestScheduler) TrackedCounts(group, kind, namespace, name string) (total, spot int) {
 	s.tracker.mu.RLock()