@@ -439,5 +439,165 @@ var _ = Describe("Eviction Controller", func() {
439439 })
440440 })
441441 })
442+
443+ Context ("Mixed VM Eviction" , func () {
444+ // serverTpl renders a single server response. The eviction controller
445+ // reads OS-EXT-SRV-ATTR:hypervisor_hostname (compared against the
446+ // short-form hypervisor name from spec) plus status/task_state/power_state.
447+ const serverTpl = `{
448+ "server": {
449+ "id": "%[1]s",
450+ "status": "%[2]s",
451+ "OS-EXT-SRV-ATTR:hypervisor_hostname": "%[3]s",
452+ "OS-EXT-STS:task_state": "%[4]s",
453+ "OS-EXT-STS:power_state": %[5]d,
454+ "fault": {"code": 500, "message": "%[6]s"}
455+ }
456+ }`
457+
458+ // migratedVMs is updated as the test simulates successful migrations.
459+ // When a VM has been "migrated", its hypervisor_hostname response
460+ // changes to a different host, signalling the controller it has left.
461+ var migratedVMs map [string ]bool
462+ var liveMigrateCalls map [string ]int
463+
464+ BeforeEach (func (ctx SpecContext ) {
465+ migratedVMs = map [string ]bool {}
466+ liveMigrateCalls = map [string ]int {}
467+
468+ By ("Seeding the eviction status with a list of VMs to evict" )
469+ // OutstandingInstances is processed from the END (peekInstance returns
470+ // last). With [good-1, error-1, good-2], processing order is:
471+ // 1) good-2 (last) - migrate, then drop
472+ // 2) error-1 (now last) - skipped via moveToBack
473+ // 3) good-1 - migrate, then drop
474+ // 4) error-1 (alone) - keeps erroring
475+ eviction := & kvmv1.Eviction {}
476+ Expect (k8sClient .Get (ctx , typeNamespacedName , eviction )).To (Succeed ())
477+ meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
478+ Type : kvmv1 .ConditionTypeEvicting ,
479+ Status : metav1 .ConditionTrue ,
480+ Message : "Running" ,
481+ Reason : kvmv1 .ConditionReasonRunning ,
482+ })
483+ meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
484+ Type : kvmv1 .ConditionTypePreflight ,
485+ Status : metav1 .ConditionTrue ,
486+ Message : "preflight passed" ,
487+ Reason : kvmv1 .ConditionReasonSucceeded ,
488+ })
489+ eviction .Status .HypervisorServiceId = serviceId
490+ eviction .Status .OutstandingInstances = []string {"good-1" , "error-1" , "good-2" }
491+ eviction .Status .OutstandingRamMb = 4096
492+ Expect (k8sClient .Status ().Update (ctx , eviction )).To (Succeed ())
493+
494+ By ("Mocking GET /servers/{id} to return per-VM state" )
495+ fakeServer .Mux .HandleFunc ("GET /servers/{server_id}" , func (w http.ResponseWriter , r * http.Request ) {
496+ serverID := r .PathValue ("server_id" )
497+ w .Header ().Add ("Content-Type" , "application/json" )
498+
499+ // hypervisor_hostname uses the FQDN-style name; the controller
500+ // only compares the short prefix (before the first ".") against
501+ // eviction.Spec.Hypervisor. After we mark a VM as migrated,
502+ // pretend it lives on a different host so the controller treats
503+ // it as "already moved" and removes it from the list.
504+ hvHost := hypervisorName + ".example.local"
505+ if migratedVMs [serverID ] {
506+ hvHost = "other-host.example.local"
507+ }
508+
509+ switch serverID {
510+ case "good-1" , "good-2" :
511+ status := "ACTIVE"
512+ if migratedVMs [serverID ] {
513+ // Once migrated, status doesn't really matter, but
514+ // keep it ACTIVE so we exercise the "different host"
515+ // branch rather than VERIFY_RESIZE.
516+ status = "ACTIVE"
517+ }
518+ w .WriteHeader (http .StatusOK )
519+ _ , err := fmt .Fprintf (w , serverTpl , serverID , status , hvHost , "" , 1 , "" )
520+ Expect (err ).NotTo (HaveOccurred ())
521+ case "error-1" :
522+ w .WriteHeader (http .StatusOK )
523+ _ , err := fmt .Fprintf (w , serverTpl , serverID , "ERROR" , hvHost , "" , 0 ,
524+ "manual intervention required" )
525+ Expect (err ).NotTo (HaveOccurred ())
526+ default :
527+ Fail ("unexpected server id: " + serverID )
528+ }
529+ })
530+
531+ By ("Mocking POST /servers/{id}/action for live-migration" )
532+ fakeServer .Mux .HandleFunc ("POST /servers/{server_id}/action" , func (w http.ResponseWriter , r * http.Request ) {
533+ serverID := r .PathValue ("server_id" )
534+ liveMigrateCalls [serverID ]++
535+ // Mark this VM as migrated so the next GET reports a different host.
536+ migratedVMs [serverID ] = true
537+ w .WriteHeader (http .StatusAccepted )
538+ })
539+ })
540+
541+ It ("skips errored VMs, evicts healthy ones, and retries errored VMs in subsequent loops" , func (ctx SpecContext ) {
542+ resource := & kvmv1.Eviction {}
543+
544+ // Reconcile loop until the list is empty or we've gone too long.
545+ // We expect: good-2 migrated, error-1 skipped, good-1 migrated, then
546+ // only error-1 remains and keeps erroring.
547+ By ("Running reconciliations until only the errored VM remains" )
548+ const maxLoops = 20
549+ for i := range maxLoops {
550+ // Tolerate errors here: when the controller hits the ERROR
551+ // VM it returns an error (joined with the status update).
552+ // That is part of the pattern under test, not a test failure.
553+ _ , reconcileErr := evictionReconciler .Reconcile (ctx , reconcileRequest )
554+ _ = reconcileErr // expected on ERROR-VM iterations
555+ Expect (k8sClient .Get (ctx , typeNamespacedName , resource )).To (Succeed ())
556+
557+ // Once both healthy VMs have been migrated and removed, we are
558+ // in the steady "only errored VM left" state.
559+ remaining := resource .Status .OutstandingInstances
560+ if len (remaining ) == 1 && remaining [0 ] == "error-1" {
561+ By (fmt .Sprintf ("Reached steady state after %d reconciliations" , i + 1 ))
562+ break
563+ }
564+ }
565+
566+ By ("Both healthy VMs were live-migrated exactly once" )
567+ Expect (liveMigrateCalls ["good-1" ]).To (Equal (1 ),
568+ "good-1 should have been migrated once" )
569+ Expect (liveMigrateCalls ["good-2" ]).To (Equal (1 ),
570+ "good-2 should have been migrated once" )
571+
572+ By ("The errored VM is still outstanding and never received a migrate call" )
573+ Expect (resource .Status .OutstandingInstances ).To (Equal ([]string {"error-1" }))
574+ Expect (liveMigrateCalls ).NotTo (HaveKey ("error-1" ))
575+
576+ By ("The migration condition reflects the most recent failure" )
577+ Expect (resource .Status .Conditions ).To (ContainElement (SatisfyAll (
578+ HaveField ("Type" , kvmv1 .ConditionTypeMigration ),
579+ HaveField ("Status" , metav1 .ConditionFalse ),
580+ HaveField ("Reason" , kvmv1 .ConditionReasonFailed ),
581+ HaveField ("Message" , ContainSubstring ("error-1" )),
582+ )))
583+
584+ By ("The eviction is NOT marked successful while the errored VM remains" )
585+ Expect (resource .Status .Conditions ).NotTo (ContainElement (SatisfyAll (
586+ HaveField ("Type" , kvmv1 .ConditionTypeEvicting ),
587+ HaveField ("Status" , metav1 .ConditionFalse ),
588+ HaveField ("Reason" , kvmv1 .ConditionReasonSucceeded ),
589+ )))
590+
591+ By ("Subsequent reconciliations keep retrying the errored VM (and surfacing the error)" )
592+ _ , err := evictionReconciler .Reconcile (ctx , reconcileRequest )
593+ // The controller returns an error when it encounters a VM in ERROR
594+ // state. The reconcile error should mention the errored UUID.
595+ if err != nil {
596+ Expect (err .Error ()).To (ContainSubstring ("error-1" ))
597+ }
598+ Expect (k8sClient .Get (ctx , typeNamespacedName , resource )).To (Succeed ())
599+ Expect (resource .Status .OutstandingInstances ).To (Equal ([]string {"error-1" }))
600+ })
601+ })
442602 })
443603})
0 commit comments