Skip to content

Commit 253e04a

Browse files
feat: cleanup candidate reservations when confirming vm (#871)
When a VM is confirmed onto a CommittedResource reservation, immediately remove that VM's UUID from Spec.Allocations on all other candidate reservations that still carry it. Previously those slots were only freed after their grace period or the next periodic requeue, leaving transient phantom blocks on non-selected hosts.
1 parent 5e46b10 commit 253e04a

5 files changed

Lines changed: 349 additions & 5 deletions

File tree

internal/scheduling/reservations/commitments/committed_resource_controller_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,17 @@ func newCRTestClient(scheme *runtime.Scheme, objects ...client.Object) client.Cl
126126
}
127127
return []string{cr.Spec.ProjectID}
128128
}).
129+
WithIndex(&v1alpha1.Reservation{}, idxReservationByAllocationVMUUID, func(obj client.Object) []string {
130+
res, ok := obj.(*v1alpha1.Reservation)
131+
if !ok || res.Spec.CommittedResourceReservation == nil {
132+
return nil
133+
}
134+
uuids := make([]string, 0, len(res.Spec.CommittedResourceReservation.Allocations))
135+
for vmUUID := range res.Spec.CommittedResourceReservation.Allocations {
136+
uuids = append(uuids, vmUUID)
137+
}
138+
return uuids
139+
}).
129140
Build()
130141
}
131142

internal/scheduling/reservations/commitments/field_index.go

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,17 @@ const idxCommittedResourceByUUID = "spec.commitmentUUID"
1818
const idxCommittedResourceByProjectID = "spec.projectID"
1919
const idxReservationByCommitmentUUID = "spec.committedResourceReservation.commitmentUUID"
2020
const idxProjectQuotaByProjectID = "spec.projectID"
21+
const idxReservationByAllocationVMUUID = "spec.committedResourceReservation.allocations"
2122

2223
// once guards ensure each field index is registered exactly once.
2324
// Both CommittedResourceController and UsageReconciler call indexCommittedResourceByUUID;
2425
// the underlying cache returns "indexer conflict" on double registration.
2526
var (
26-
onceIndexCRByUUID sync.Once
27-
onceIndexCRByProjectID sync.Once
28-
onceIndexReservationByUUID sync.Once
29-
onceIndexPQByProjectID sync.Once
27+
onceIndexCRByUUID sync.Once
28+
onceIndexCRByProjectID sync.Once
29+
onceIndexReservationByUUID sync.Once
30+
onceIndexPQByProjectID sync.Once
31+
onceIndexReservationByAllocationVMID sync.Once
3032
)
3133

3234
// indexCommittedResourceByUUID registers the index used by UsageReconciler to look up
@@ -128,3 +130,34 @@ func indexProjectQuotaByProjectID(ctx context.Context, mcl *multicluster.Client)
128130
})
129131
return err
130132
}
133+
134+
// indexReservationByAllocationVMUUID registers an index over all VM UUIDs present in
135+
// Spec.CommittedResourceReservation.Allocations. This allows the reservation controller
136+
// to efficiently find all other Reservation CRDs carrying a specific VM UUID without
137+
// scanning every reservation in the cluster.
138+
func indexReservationByAllocationVMUUID(ctx context.Context, mcl *multicluster.Client) (err error) {
139+
onceIndexReservationByAllocationVMID.Do(func() {
140+
log := logf.FromContext(ctx)
141+
err = mcl.IndexField(ctx,
142+
&v1alpha1.Reservation{},
143+
&v1alpha1.ReservationList{},
144+
idxReservationByAllocationVMUUID,
145+
func(obj client.Object) []string {
146+
res, ok := obj.(*v1alpha1.Reservation)
147+
if !ok {
148+
log.Error(errors.New("unexpected type"), "expected Reservation", "object", obj)
149+
return nil
150+
}
151+
if res.Spec.CommittedResourceReservation == nil {
152+
return nil
153+
}
154+
uuids := make([]string, 0, len(res.Spec.CommittedResourceReservation.Allocations))
155+
for vmUUID := range res.Spec.CommittedResourceReservation.Allocations {
156+
uuids = append(uuids, vmUUID)
157+
}
158+
return uuids
159+
},
160+
)
161+
})
162+
return err
163+
}

internal/scheduling/reservations/commitments/integration_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,17 @@ func newIntgEnv(t *testing.T, initialObjects []client.Object, schedulerFn http.H
359359
}
360360
return []string{cr.Spec.CommitmentUUID}
361361
}).
362+
WithIndex(&v1alpha1.Reservation{}, idxReservationByAllocationVMUUID, func(obj client.Object) []string {
363+
res, ok := obj.(*v1alpha1.Reservation)
364+
if !ok || res.Spec.CommittedResourceReservation == nil {
365+
return nil
366+
}
367+
uuids := make([]string, 0, len(res.Spec.CommittedResourceReservation.Allocations))
368+
for vmUUID := range res.Spec.CommittedResourceReservation.Allocations {
369+
uuids = append(uuids, vmUUID)
370+
}
371+
return uuids
372+
}).
362373
Build()
363374

364375
schedulerSrv := httptest.NewServer(schedulerFn)

internal/scheduling/reservations/commitments/reservation_controller.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,13 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte
395395
res.Status.CommittedResourceReservation = &v1alpha1.CommittedResourceReservationStatus{}
396396
}
397397

398+
// Snapshot existing Status.Allocations before we overwrite it so we can detect
399+
// which VM UUIDs are newly confirmed after the patch.
400+
existingStatusAllocations := make(map[string]string, len(res.Status.CommittedResourceReservation.Allocations))
401+
for k, v := range res.Status.CommittedResourceReservation.Allocations {
402+
existingStatusAllocations[k] = v
403+
}
404+
398405
// Build new Status.Allocations map based on HV CRD state.
399406
newStatusAllocations := make(map[string]string)
400407
// Track allocations to remove from Spec (stale/leaving VMs).
@@ -469,6 +476,19 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte
469476
res.Status.CommittedResourceReservation.Allocations = newStatusAllocations
470477
}
471478

479+
// Proactively remove this VM UUID from all other candidate reservations that still
480+
// carry it in their Spec.Allocations. Only do this for VMs that are newly confirmed
481+
// in this reconcile cycle (present in newStatusAllocations but absent in the snapshot
482+
// taken before any patch) to avoid redundant work on subsequent reconciles.
483+
for vmUUID := range newStatusAllocations {
484+
if _, wasAlreadyConfirmed := existingStatusAllocations[vmUUID]; wasAlreadyConfirmed {
485+
continue
486+
}
487+
if err := r.cleanupCandidateReservations(ctx, res.Name, vmUUID); err != nil {
488+
return nil, fmt.Errorf("failed to cleanup candidate reservations for vm %s: %w", vmUUID, err)
489+
}
490+
}
491+
472492
// Patch Status
473493
patch := client.MergeFrom(old)
474494
if err := r.Status().Patch(ctx, res, patch); err != nil {
@@ -487,6 +507,41 @@ func (r *CommitmentReservationController) reconcileAllocations(ctx context.Conte
487507
return result, nil
488508
}
489509

510+
// cleanupCandidateReservations removes vmUUID from Spec.Allocations of all Reservation CRDs
511+
// other than the one that just confirmed the VM. This is called once per newly confirmed VM
512+
// so that candidate slots on non-selected hosts are freed immediately rather than waiting
513+
// for those reservations' own grace period or periodic requeue.
514+
func (r *CommitmentReservationController) cleanupCandidateReservations(ctx context.Context, confirmedReservationName, vmUUID string) error {
515+
logger := LoggerFromContext(ctx).WithValues("component", "controller", "vm", vmUUID)
516+
517+
var candidates v1alpha1.ReservationList
518+
if err := r.List(ctx, &candidates, client.MatchingFields{idxReservationByAllocationVMUUID: vmUUID}); err != nil {
519+
return fmt.Errorf("failed to list candidate reservations: %w", err)
520+
}
521+
522+
for i := range candidates.Items {
523+
candidate := &candidates.Items[i]
524+
if candidate.Name == confirmedReservationName {
525+
continue
526+
}
527+
if candidate.Spec.CommittedResourceReservation == nil {
528+
continue
529+
}
530+
if _, ok := candidate.Spec.CommittedResourceReservation.Allocations[vmUUID]; !ok {
531+
continue
532+
}
533+
old := candidate.DeepCopy()
534+
delete(candidate.Spec.CommittedResourceReservation.Allocations, vmUUID)
535+
if err := r.Patch(ctx, candidate, client.MergeFrom(old)); err != nil {
536+
if client.IgnoreNotFound(err) != nil {
537+
return fmt.Errorf("failed to patch candidate reservation %s: %w", candidate.Name, err)
538+
}
539+
}
540+
logger.Info("removed vm from candidate reservation", "reservation", candidate.Name, "host", candidate.Status.Host)
541+
}
542+
return nil
543+
}
544+
490545
// getPipelineForFlavorGroup returns the pipeline name for a given flavor group.
491546
func (r *CommitmentReservationController) getPipelineForFlavorGroup(flavorGroupName string, logger logr.Logger) string {
492547
// Try exact match first (e.g., "2152" -> "kvm-cr-hana")
@@ -581,6 +636,10 @@ func (r *CommitmentReservationController) SetupWithManager(mgr ctrl.Manager, mcl
581636
return err
582637
}
583638

639+
if err := indexReservationByAllocationVMUUID(context.Background(), mcl); err != nil {
640+
return fmt.Errorf("failed to set up reservation allocation VM UUID index: %w", err)
641+
}
642+
584643
// Use WatchesMulticluster to watch Reservations across all configured clusters
585644
// (home + remotes). This is required because Reservation CRDs may be stored
586645
// in remote clusters, not just the home cluster. Without this, the controller

0 commit comments

Comments
 (0)