Skip to content

Commit 6daa505

Browse files
authored
feat(cr): pre-allocate PAYG VMs into CR reservation slots on cr creation/modification (#951)
When a new committed resource is created, the controller now absorbs existing PAYG VMs into reservation slots instead of blindly probing the scheduler for every slot.
1 parent daae898 commit 6daa505

10 files changed

Lines changed: 1228 additions & 48 deletions

File tree

cmd/manager/main.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -604,10 +604,11 @@ func main() {
604604
metrics.Registry.MustRegister(&crControllerMonitor)
605605

606606
if err := (&commitments.CommittedResourceController{
607-
Client: multiclusterClient,
608-
Scheme: mgr.GetScheme(),
609-
Conf: crControllerConf,
610-
Monitor: &crControllerMonitor,
607+
Client: multiclusterClient,
608+
Scheme: mgr.GetScheme(),
609+
Conf: crControllerConf,
610+
Monitor: &crControllerMonitor,
611+
VMSource: commitmentsVMSource,
611612
}).SetupWithManager(mgr, multiclusterClient); err != nil {
612613
setupLog.Error(err, "unable to create controller", "controller", "CommittedResource")
613614
os.Exit(1)

helm/bundles/cortex-nova/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ cortex-scheduling-controllers:
197197
slotCreationDelay: "0ms"
198198
# Max Reservation CRDs per CommittedResource on the API path; 0 disables the limit
199199
maxSlotsPerCommitment: 0
200+
# When true, the controller scans the AZ for existing PAYG VMs and pre-allocates them
201+
# into reservation slots before falling back to blind scheduler placement.
202+
enablePaygPreAllocation: false
200203
committedResourceAPI:
201204
# Timeout for watching CommittedResource CRDs before rolling back
202205
watchTimeout: "15s"

internal/scheduling/reservations/commitments/committed_resource_controller.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ type CommittedResourceController struct {
3838
Scheme *runtime.Scheme
3939
Conf CommittedResourceControllerConfig
4040
Monitor *CRControllerMonitor
41+
// VMSource enables PAYG pre-allocation when creating reservation slots. When nil the
42+
// PAYG scan is skipped and all slots are created via blind scheduler probes.
43+
VMSource reservations.VMSource
4144
}
4245

4346
func (r *CommittedResourceController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
@@ -315,11 +318,16 @@ func (r *CommittedResourceController) applyReservationState(ctx context.Context,
315318
state.CreatorRequestID = reservations.GlobalRequestIDFromContext(ctx)
316319
state.ParentGeneration = cr.Generation
317320

318-
mgr := NewReservationManager(r.Client)
319-
mgr.SlotCreationDelay = r.Conf.SlotCreationDelay.Duration
321+
maxSlots := 0
320322
if cr.Spec.AllowRejection {
321-
mgr.MaxSlots = r.Conf.MaxSlotsPerCommitment
323+
maxSlots = r.Conf.MaxSlotsPerCommitment
322324
}
325+
mgr := NewReservationManager(r.Client, ReservationManagerConfig{
326+
SlotCreationDelay: r.Conf.SlotCreationDelay.Duration,
327+
MaxSlots: maxSlots,
328+
EnablePaygPreAllocation: r.Conf.EnablePaygPreAllocation,
329+
VMSource: r.VMSource,
330+
})
323331
result, err := mgr.ApplyCommitmentState(ctx, logger, state, flavorGroups, "committed-resource-controller")
324332
if err != nil {
325333
var limitErr *SlotLimitExceededError
@@ -470,7 +478,11 @@ func (r *CommittedResourceController) rollbackToAccepted(ctx context.Context, lo
470478
state.NamePrefix = cr.Name + "-"
471479
state.CreatorRequestID = reservations.GlobalRequestIDFromContext(ctx)
472480
state.ParentGeneration = cr.Generation
473-
if _, err := NewReservationManager(r.Client).ApplyCommitmentState(ctx, logger, state, flavorGroups, "committed-resource-controller-rollback"); err != nil {
481+
rollbackMgr := NewReservationManager(r.Client, ReservationManagerConfig{
482+
EnablePaygPreAllocation: r.Conf.EnablePaygPreAllocation,
483+
VMSource: r.VMSource,
484+
})
485+
if _, err := rollbackMgr.ApplyCommitmentState(ctx, logger, state, flavorGroups, "committed-resource-controller-rollback"); err != nil {
474486
return fmt.Errorf("rollback apply failed: %w", err)
475487
}
476488
return nil

internal/scheduling/reservations/commitments/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,11 @@ type CommittedResourceControllerConfig struct {
102102
// Has no effect on the AllowRejection=false (syncer) path.
103103
// 0 disables the cap.
104104
MaxSlotsPerCommitment int `json:"maxSlotsPerCommitment"`
105+
106+
// EnablePaygPreAllocation enables scanning the AZ for existing PAYG VMs before creating
107+
// blind reservation slots. When true, the controller absorbs matching PAYG VMs into
108+
// pre-populated slots, consuming CR delta before falling back to the blind scheduler path.
109+
EnablePaygPreAllocation bool `json:"enablePaygPreAllocation,omitempty"`
105110
}
106111

107112
// ResourceTypeConfig holds per-resource flags for a single resource type within a flavor group.

internal/scheduling/reservations/commitments/integration_test.go

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929

3030
schedulerdelegationapi "github.com/cobaltcore-dev/cortex/api/external/nova"
3131
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
32+
"github.com/cobaltcore-dev/cortex/internal/scheduling/reservations"
3233
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
3334
"k8s.io/apimachinery/pkg/api/meta"
3435
"k8s.io/apimachinery/pkg/api/resource"
@@ -55,6 +56,9 @@ type CRIntegrationTestCase struct {
5556
// CRs to create and drive to terminal state.
5657
CommittedResources []*v1alpha1.CommittedResource
5758

59+
// When set, the CR controller is given a VMSource for PAYG pre-allocation.
60+
VMSource reservations.VMSource
61+
5862
// When true the mock scheduler returns an empty hosts list (NoHostsFound).
5963
SchedulerRejects bool
6064
// SchedulerAcceptFirst, when > 0, makes the mock scheduler accept only the first N
@@ -63,12 +67,13 @@ type CRIntegrationTestCase struct {
6367
SchedulerAcceptFirst int
6468

6569
// Expected state after all CRs reach a terminal condition.
66-
ExpectedSlots int // total Reservation CRDs remaining in the store
67-
AcceptedCRs []string // CRs expected Ready=True / Accepted
68-
RejectedCRs []string // CRs expected Ready=False / Rejected
69-
PlannedCRs []string // CRs expected Ready=False / Planned
70-
ExpiredCRs []string // CRs expected Ready=False / Expired
71-
SupersededCRs []string // CRs expected Ready=False / Superseded
70+
ExpectedSlots int // total Reservation CRDs remaining in the store
71+
AcceptedCRs []string // CRs expected Ready=True / Accepted
72+
RejectedCRs []string // CRs expected Ready=False / Rejected
73+
PlannedCRs []string // CRs expected Ready=False / Planned
74+
ExpiredCRs []string // CRs expected Ready=False / Expired
75+
SupersededCRs []string // CRs expected Ready=False / Superseded
76+
ValidateReservations func(t *testing.T, slots []v1alpha1.Reservation) // optional extra assertions
7277
}
7378

7479
func TestCRIntegration(t *testing.T) {
@@ -258,6 +263,70 @@ func TestCRIntegration(t *testing.T) {
258263
ExpectedSlots: 0,
259264
RejectedCRs: []string{"cr-partial"},
260265
},
266+
// ------------------------------------------------------------------
267+
// PAYG pre-allocation
268+
// ------------------------------------------------------------------
269+
{
270+
// PAYG VM on host-1 matches the CR project + flavor group.
271+
// CR controller pre-allocates the slot; reservation controller marks it
272+
// Ready via the PreAllocated fast-path without calling the scheduler.
273+
Name: "PAYG VM present: slot pre-allocated on HV, no scheduler call needed",
274+
Hypervisors: []*hv1.Hypervisor{
275+
intgHypervisorWithAZ("host-1", "test-az", "vm-payg-1"),
276+
},
277+
VMSource: &fakeVMSource{vms: []reservations.VM{{
278+
UUID: "vm-payg-1",
279+
FlavorName: "test-flavor",
280+
CurrentHypervisor: "host-1",
281+
}}},
282+
CommittedResources: []*v1alpha1.CommittedResource{
283+
intgCR("cr-payg", "uuid-intg-payg-1", v1alpha1.CommitmentStatusConfirmed),
284+
},
285+
SchedulerRejects: true, // scheduler would reject if called — proves it isn't
286+
ExpectedSlots: 1,
287+
AcceptedCRs: []string{"cr-payg"},
288+
ValidateReservations: func(t *testing.T, slots []v1alpha1.Reservation) {
289+
t.Helper()
290+
if len(slots) != 1 {
291+
t.Fatalf("want 1 slot, got %d", len(slots))
292+
}
293+
res := slots[0]
294+
if res.Spec.TargetHost != "host-1" {
295+
t.Errorf("TargetHost: want host-1, got %q", res.Spec.TargetHost)
296+
}
297+
if res.Spec.CommittedResourceReservation == nil {
298+
t.Fatal("CommittedResourceReservation is nil")
299+
}
300+
if _, ok := res.Spec.CommittedResourceReservation.Allocations["vm-payg-1"]; !ok {
301+
t.Error("expected vm-payg-1 in Spec.Allocations")
302+
}
303+
},
304+
},
305+
{
306+
// No PAYG VMs → falls through to the scheduler. Scheduler accepts → CR accepted.
307+
Name: "no PAYG VMs: falls back to scheduler, CR accepted normally",
308+
Hypervisors: []*hv1.Hypervisor{
309+
intgHypervisorWithAZ("host-1", "test-az"),
310+
},
311+
VMSource: &fakeVMSource{vms: nil},
312+
CommittedResources: []*v1alpha1.CommittedResource{
313+
intgCR("cr-nopayg", "uuid-intg-payg-2", v1alpha1.CommitmentStatusConfirmed),
314+
},
315+
ExpectedSlots: 1,
316+
AcceptedCRs: []string{"cr-nopayg"},
317+
ValidateReservations: func(t *testing.T, slots []v1alpha1.Reservation) {
318+
t.Helper()
319+
if len(slots) != 1 {
320+
t.Fatalf("want 1 slot, got %d", len(slots))
321+
}
322+
if slots[0].Spec.TargetHost == "" {
323+
t.Error("expected TargetHost set by scheduler (Phase 5 path)")
324+
}
325+
if len(slots[0].Spec.CommittedResourceReservation.Allocations) != 0 {
326+
t.Error("expected no pre-allocations on scheduler-placed slot")
327+
}
328+
},
329+
},
261330
}
262331

263332
for _, tc := range testCases {
@@ -290,7 +359,7 @@ func runCRIntegrationTestCase(t *testing.T, tc CRIntegrationTestCase) {
290359
objects = append(objects, res)
291360
}
292361

293-
env := newIntgEnv(t, objects, schedulerFn)
362+
env := newIntgEnv(t, objects, schedulerFn, tc.VMSource)
294363
defer env.close()
295364

296365
crNames := make([]string, len(tc.CommittedResources))
@@ -320,6 +389,10 @@ func runCRIntegrationTestCase(t *testing.T, tc CRIntegrationTestCase) {
320389
intgAssertCRCondition(t, env.k8sClient, tc.PlannedCRs, metav1.ConditionFalse, v1alpha1.CommittedResourceReasonPlanned)
321390
intgAssertCRCondition(t, env.k8sClient, tc.ExpiredCRs, metav1.ConditionFalse, string(v1alpha1.CommitmentStatusExpired))
322391
intgAssertCRCondition(t, env.k8sClient, tc.SupersededCRs, metav1.ConditionFalse, string(v1alpha1.CommitmentStatusSuperseded))
392+
393+
if tc.ValidateReservations != nil {
394+
tc.ValidateReservations(t, resList.Items)
395+
}
323396
}
324397

325398
// ============================================================================
@@ -333,7 +406,7 @@ type intgEnv struct {
333406
schedulerSrv *httptest.Server
334407
}
335408

336-
func newIntgEnv(t *testing.T, initialObjects []client.Object, schedulerFn http.HandlerFunc) *intgEnv {
409+
func newIntgEnv(t *testing.T, initialObjects []client.Object, schedulerFn http.HandlerFunc, vmSource reservations.VMSource) *intgEnv {
337410
t.Helper()
338411
scheme := newCRTestScheme(t)
339412

@@ -377,7 +450,11 @@ func newIntgEnv(t *testing.T, initialObjects []client.Object, schedulerFn http.H
377450
crCtrl := &CommittedResourceController{
378451
Client: k8sClient,
379452
Scheme: scheme,
380-
Conf: CommittedResourceControllerConfig{RequeueIntervalRetry: metav1.Duration{Duration: 5 * time.Minute}},
453+
Conf: CommittedResourceControllerConfig{
454+
RequeueIntervalRetry: metav1.Duration{Duration: 5 * time.Minute},
455+
EnablePaygPreAllocation: vmSource != nil,
456+
},
457+
VMSource: vmSource,
381458
}
382459
resCtrl := &CommitmentReservationController{
383460
Client: k8sClient,
@@ -399,7 +476,7 @@ func (e *intgEnv) close() { e.schedulerSrv.Close() }
399476
func newDefaultIntgEnv(t *testing.T) *intgEnv {
400477
t.Helper()
401478
objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")}
402-
return newIntgEnv(t, objects, intgAcceptScheduler)
479+
return newIntgEnv(t, objects, intgAcceptScheduler, nil)
403480
}
404481

405482
func (e *intgEnv) reconcileCR(t *testing.T, crName string) {
@@ -630,6 +707,21 @@ func intgHypervisor(name string) *hv1.Hypervisor {
630707
return &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: name}}
631708
}
632709

710+
// intgHypervisorWithAZ returns a Hypervisor in the given AZ with optional active instances.
711+
func intgHypervisorWithAZ(name, az string, instanceIDs ...string) *hv1.Hypervisor {
712+
instances := make([]hv1.Instance, len(instanceIDs))
713+
for i, id := range instanceIDs {
714+
instances[i] = hv1.Instance{ID: id, Name: id, Active: true}
715+
}
716+
return &hv1.Hypervisor{
717+
ObjectMeta: metav1.ObjectMeta{
718+
Name: name,
719+
Labels: map[string]string{"topology.kubernetes.io/zone": az},
720+
},
721+
Status: hv1.HypervisorStatus{Instances: instances},
722+
}
723+
}
724+
633725
// intgCR returns a CommittedResource with the default 4 GiB amount.
634726
// commitmentUUID must be unique per test case to avoid field-index collisions.
635727
func intgCR(name, commitmentUUID string, state v1alpha1.CommitmentStatus) *v1alpha1.CommittedResource {
@@ -886,7 +978,7 @@ func TestCRLifecycle(t *testing.T) {
886978
})
887979

888980
t.Run("AllowRejection=false: stays Reserving when scheduler rejects", func(t *testing.T) {
889-
env := newIntgEnv(t, []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")}, intgRejectScheduler)
981+
env := newIntgEnv(t, []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")}, intgRejectScheduler, nil)
890982
defer env.close()
891983

892984
cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed)
@@ -995,7 +1087,7 @@ func TestCRLifecycle(t *testing.T) {
9951087
t.Run("resize failure: rolls back to AcceptedSpec, prior slot preserved", func(t *testing.T) {
9961088
// Scheduler: accepts the first placement call (initial 4 GiB slot), rejects all subsequent.
9971089
objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")}
998-
env := newIntgEnv(t, objects, intgAcceptFirstScheduler(1))
1090+
env := newIntgEnv(t, objects, intgAcceptFirstScheduler(1), nil)
9991091
defer env.close()
10001092

10011093
cr := intgCRAllowRejection("my-cr", "uuid-resize-0001", v1alpha1.CommitmentStatusConfirmed)
@@ -1060,7 +1152,7 @@ func TestCRLifecycle(t *testing.T) {
10601152
// then accepts all subsequent. AllowRejection=false means the CR controller retries rather
10611153
// than rejecting, so the CR must eventually reach Accepted once the scheduler cooperates.
10621154
objects := []client.Object{newTestFlavorKnowledge(), intgHypervisor("host-1")}
1063-
env := newIntgEnv(t, objects, intgRejectFirstScheduler(2))
1155+
env := newIntgEnv(t, objects, intgRejectFirstScheduler(2), nil)
10641156
defer env.close()
10651157

10661158
cr := newTestCommittedResource("my-cr", v1alpha1.CommitmentStatusConfirmed)

0 commit comments

Comments
 (0)