Skip to content

Commit 56f2ed0

Browse files
author
longsuizhi
committed
fix(allocator): address CR feedback - exclude released and terminal pods from live allocation
- P1: Released pods no longer count toward liveAllocated, preventing stale released entries from blocking subsequent re-allocations. - P2: Only Running+Ready pods are added to livePodSet, so terminal pods (Failed/Evicted) that still have their object present also trigger re-allocation.
1 parent d4b8e95 commit 56f2ed0

2 files changed

Lines changed: 75 additions & 10 deletions

File tree

kubernetes/internal/controller/allocator.go

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -461,9 +461,14 @@ func (allocator *defaultAllocator) Schedule(ctx context.Context, spec *AllocSpec
461461
}
462462

463463
// Build a set of live pool pod names for dead-pod detection during allocation requests.
464+
// Running pods (regardless of readiness) are considered live to avoid churn from transient
465+
// readiness probe flaps. Only truly terminal pods (Failed/Succeeded) or deleted pods
466+
// should trigger re-allocation.
464467
livePodSet := make(map[string]struct{}, len(spec.Pods))
465468
for _, p := range spec.Pods {
466-
livePodSet[p.Name] = struct{}{}
469+
if p.Status.Phase == corev1.PodRunning || p.Status.Phase == corev1.PodPending {
470+
livePodSet[p.Name] = struct{}{}
471+
}
467472
}
468473

469474
// Fetch pool allocation once and reuse it for both stale-sandbox cleanup and available-pod filtering.
@@ -545,21 +550,26 @@ func (allocator *defaultAllocator) getSandboxRequest(ctx context.Context, sandbo
545550
releasedSet[r] = struct{}{}
546551
}
547552

548-
// Filter out pods that no longer exist in the pool (e.g. externally deleted).
549-
// Dead pods are treated as released so the sandbox can receive replacement allocations.
553+
// Filter out pods that no longer exist in the pool (e.g. externally deleted) and
554+
// pods that have been released AND are no longer live. Only unreleased live pods
555+
// should count toward the effective allocation when computing supplement.
556+
// Released pods that are still alive keep their slot to avoid premature supplement
557+
// (the recycle flow will eventually remove them from alloc-status).
550558
liveAllocated := make([]string, 0, len(allocated))
551559
deadPods := make([]string, 0)
552560
for _, p := range allocated {
553-
if _, exists := releasedSet[p]; exists {
554-
// Already released, keep in allocated for bookkeeping consistency.
555-
liveAllocated = append(liveAllocated, p)
561+
_, isReleased := releasedSet[p]
562+
_, isAlive := livePodSet[p]
563+
if isReleased && !isAlive {
564+
// Released and gone — no longer occupies a slot.
556565
continue
557566
}
558-
if _, alive := livePodSet[p]; alive {
559-
liveAllocated = append(liveAllocated, p)
560-
} else {
567+
if !isReleased && !isAlive {
568+
// Not released but gone — externally deleted, needs re-allocation.
561569
deadPods = append(deadPods, p)
570+
continue
562571
}
572+
liveAllocated = append(liveAllocated, p)
563573
}
564574
if len(deadPods) > 0 {
565575
log.Info("Detected dead allocated pods, queuing for release to trigger re-allocation",
@@ -594,13 +604,20 @@ func (allocator *defaultAllocator) getSandboxRequest(ctx context.Context, sandbo
594604
}
595605

596606
toRelease := make([]string, 0)
607+
toReleaseSet := make(map[string]struct{})
597608
for _, r := range release {
598609
if _, exists := releasedSet[r]; !exists {
599610
toRelease = append(toRelease, r)
611+
toReleaseSet[r] = struct{}{}
600612
}
601613
}
602614
// Also queue dead pods for release so their allocation records are cleaned up.
603-
toRelease = append(toRelease, deadPods...)
615+
// Deduplicate against pods already in alloc-release to avoid duplicate recycle operations.
616+
for _, dp := range deadPods {
617+
if _, exists := toReleaseSet[dp]; !exists {
618+
toRelease = append(toRelease, dp)
619+
}
620+
}
604621

605622
replica := int32(0)
606623
if sandbox.Spec.Replicas != nil {

kubernetes/internal/controller/allocator_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,54 @@ func TestSchedule(t *testing.T) {
259259
PodSupplement: 0,
260260
},
261261
},
262+
{
263+
name: "released pod does not block second re-allocation",
264+
spec: &AllocSpec{
265+
Pods: []*corev1.Pod{
266+
// pod1 was previously allocated and released (recycled).
267+
// pod2 was the replacement but is now also deleted.
268+
// pod3 is a new available pod.
269+
{ObjectMeta: metav1.ObjectMeta{Name: "pod3"}, Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}}},
270+
},
271+
Pool: &sandboxv1alpha1.Pool{ObjectMeta: metav1.ObjectMeta{Name: "pool1"}},
272+
Sandboxes: []*sandboxv1alpha1.BatchSandbox{
273+
{ObjectMeta: metav1.ObjectMeta{Name: "sbx1"}, Spec: sandboxv1alpha1.BatchSandboxSpec{Replicas: &replica1}},
274+
},
275+
},
276+
poolAlloc: &PoolAllocation{PodAllocation: map[string]string{"pod1": "sbx1", "pod2": "sbx1"}},
277+
sandboxAllocs: map[string]*SandboxAllocation{"sbx1": {Pods: []string{"pod1", "pod2"}}},
278+
releases: map[string]*AllocationRelease{"sbx1": {Pods: []string{}}},
279+
released: map[string]*AllocationReleased{"sbx1": {Pods: []string{"pod1"}}},
280+
wantAction: &algorithm.AllocAction{
281+
ToAllocate: map[string][]string{"sbx1": {"pod3"}},
282+
ToRelease: map[string][]string{"sbx1": {"pod2"}},
283+
PodSupplement: 0,
284+
},
285+
},
286+
{
287+
name: "terminal pod (Failed) triggers re-allocation",
288+
spec: &AllocSpec{
289+
Pods: []*corev1.Pod{
290+
// pod1 exists but is in Failed state — should not count as live.
291+
{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Status: corev1.PodStatus{Phase: corev1.PodFailed}},
292+
// pod2 is ready and available.
293+
{ObjectMeta: metav1.ObjectMeta{Name: "pod2"}, Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}}},
294+
},
295+
Pool: &sandboxv1alpha1.Pool{ObjectMeta: metav1.ObjectMeta{Name: "pool1"}},
296+
Sandboxes: []*sandboxv1alpha1.BatchSandbox{
297+
{ObjectMeta: metav1.ObjectMeta{Name: "sbx1"}, Spec: sandboxv1alpha1.BatchSandboxSpec{Replicas: &replica1}},
298+
},
299+
},
300+
poolAlloc: &PoolAllocation{PodAllocation: map[string]string{"pod1": "sbx1"}},
301+
sandboxAllocs: map[string]*SandboxAllocation{"sbx1": {Pods: []string{"pod1"}}},
302+
releases: map[string]*AllocationRelease{"sbx1": {Pods: []string{}}},
303+
released: map[string]*AllocationReleased{"sbx1": {Pods: []string{}}},
304+
wantAction: &algorithm.AllocAction{
305+
ToAllocate: map[string][]string{"sbx1": {"pod2"}},
306+
ToRelease: map[string][]string{"sbx1": {"pod1"}},
307+
PodSupplement: 0,
308+
},
309+
},
262310
{
263311
name: "orphan sandbox - pods in store but sandbox no longer in spec",
264312
spec: &AllocSpec{

0 commit comments

Comments
 (0)