Skip to content

Commit 757368b

Browse files
fix: subtract VM allocations when counting placeable slots (#891)
probeScheduler always used raw effectiveCapacity for slot counting, making running VMs invisible in usage = totalSlots - placeableSlots. The total probe correctly ignores allocations (empty-datacenter view), but the placeable probe must subtract hv.Status.Allocation so that each host's remaining capacity reflects slots still available after running VMs are accounted for.
1 parent 67d34cd commit 757368b

2 files changed

Lines changed: 60 additions & 4 deletions

File tree

internal/scheduling/reservations/capacity/controller.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ func (c *Controller) reconcileOne(
162162
cur := existingByName[flavor.Name]
163163
cur.FlavorName = flavor.Name
164164

165-
totalVMSlots, totalHosts, totalErr := c.probeScheduler(ctx, flavor, az, c.config.TotalPipeline, hvByName)
166-
placeableVMs, placeableHosts, placeableErr := c.probeScheduler(ctx, flavor, az, c.config.PlaceablePipeline, hvByName)
165+
totalVMSlots, totalHosts, totalErr := c.probeScheduler(ctx, flavor, az, c.config.TotalPipeline, hvByName, true)
166+
placeableVMs, placeableHosts, placeableErr := c.probeScheduler(ctx, flavor, az, c.config.PlaceablePipeline, hvByName, false)
167167

168168
if totalErr != nil {
169169
allFresh = false
@@ -257,11 +257,15 @@ func (c *Controller) reconcileOne(
257257

258258
// probeScheduler calls the scheduler with the given pipeline and returns VM slots + host count.
259259
// Capacity is computed as sum of floor(hostMemory / flavorMemory) across returned hosts.
260+
// When ignoreAllocations is true (total/empty-datacenter probe), raw effective capacity is used.
261+
// When false (placeable probe), hv.Status.Allocation is subtracted first so that slots reflect
262+
// remaining capacity after running VMs.
260263
func (c *Controller) probeScheduler(
261264
ctx context.Context,
262265
flavor compute.FlavorInGroup,
263266
az, pipeline string,
264267
hvByName map[string]hv1.Hypervisor,
268+
ignoreAllocations bool,
265269
) (capacity, hosts int64, err error) {
266270

267271
flavorBytes := int64(flavor.MemoryMB) * 1024 * 1024 //nolint:gosec
@@ -309,7 +313,16 @@ func (c *Controller) probeScheduler(
309313
if !ok {
310314
continue
311315
}
312-
if capBytes := memCap.Value(); capBytes > 0 {
316+
capBytes := memCap.Value()
317+
if !ignoreAllocations {
318+
if alloc, ok := hv.Status.Allocation[hv1.ResourceMemory]; ok {
319+
capBytes -= alloc.Value()
320+
}
321+
if capBytes < 0 {
322+
capBytes = 0
323+
}
324+
}
325+
if capBytes > 0 {
313326
capacity += capBytes / flavorBytes
314327
}
315328
}

internal/scheduling/reservations/capacity/controller_test.go

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ func TestProbeScheduler_CapacityCalculation(t *testing.T) {
429429
}
430430
flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB}
431431

432-
capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName)
432+
capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName, true)
433433
if err != nil {
434434
t.Fatalf("probeScheduler failed: %v", err)
435435
}
@@ -442,6 +442,49 @@ func TestProbeScheduler_CapacityCalculation(t *testing.T) {
442442
}
443443
}
444444

445+
// TestProbeScheduler_SubtractsAllocationsWhenNotIgnored verifies that placeable-probe slot
446+
// counting uses remaining capacity (effectiveCapacity − allocation) while the total-probe uses
447+
// raw capacity. This is the regression test for the bug where both probes used raw capacity,
448+
// making running VMs invisible in the usage = total − placeable calculation.
449+
func TestProbeScheduler_SubtractsAllocationsWhenNotIgnored(t *testing.T) {
450+
const memMB = 4096
451+
const memBytes = int64(memMB) * 1024 * 1024
452+
453+
scheme := newTestScheme(t)
454+
455+
// Host has 2-slot capacity (2 × flavor), with 1 slot already used by a running VM.
456+
hv := newHypervisor("host-1", "az-a", memBytes*2)
457+
hv.Status.Allocation = map[hv1.ResourceName]resource.Quantity{
458+
hv1.ResourceMemory: *resource.NewQuantity(memBytes, resource.BinarySI),
459+
}
460+
461+
fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build()
462+
srv := newMockSchedulerServer(t, []string{"host-1"})
463+
defer srv.Close()
464+
465+
c := NewController(fakeClient, Config{SchedulerURL: srv.URL})
466+
hvByName := map[string]hv1.Hypervisor{"host-1": *hv}
467+
flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB}
468+
469+
// Total probe (ignoreAllocations=true): raw capacity → 2 slots.
470+
totalCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "total-pipeline", hvByName, true)
471+
if err != nil {
472+
t.Fatalf("probeScheduler (total) failed: %v", err)
473+
}
474+
if totalCap != 2 {
475+
t.Errorf("total capacity = %d, want 2 (raw slots)", totalCap)
476+
}
477+
478+
// Placeable probe (ignoreAllocations=false): capacity − allocation → 1 slot.
479+
placeableCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "placeable-pipeline", hvByName, false)
480+
if err != nil {
481+
t.Fatalf("probeScheduler (placeable) failed: %v", err)
482+
}
483+
if placeableCap != 1 {
484+
t.Errorf("placeable capacity = %d, want 1 (remaining slot after running VM)", placeableCap)
485+
}
486+
}
487+
445488
func TestReconcileAll_MultipleGroupsAndAZs(t *testing.T) {
446489
scheme := newTestScheme(t)
447490

0 commit comments

Comments
 (0)