Skip to content

Commit 31d9028

Browse files
committed
fix(api, vmop): fix migration reason and progress logic
- BuildRecord now reads AutoConverge from MigrationConfiguration.AllowAutoConverge instead of vmop.Spec.Force; remove resolveAutoConverge helper - isAtMaxThrottle: !AutoConverge => always at max (safe mode), AutoConverge => throttle >= 0.99 - Add live TargetDiskError detection via target pod events (FailedAttachVolume/FailedMount) - Preserve NotConverging terminal reason when migration fails with generic reason - Add unit tests for IsNotConverging, BuildRecord AutoConverge, and integration tests for TargetPreparing, TargetResumed, SourceSuspended, NotConverging persistence, TargetDiskError live detection Signed-off-by: Daniil Antoshin <daniil.antoshin@flant.com>
1 parent 19f48f9 commit 31d9028

5 files changed

Lines changed: 552 additions & 18 deletions

File tree

images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
corev1 "k8s.io/api/core/v1"
2626
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
"k8s.io/apimachinery/pkg/fields"
2728
"k8s.io/apimachinery/pkg/types"
2829
"k8s.io/utils/ptr"
2930
virtv1 "kubevirt.io/api/core/v1"
@@ -63,6 +64,11 @@ const (
6364
messageSourceVMSuspended = "Source VM suspended"
6465
)
6566

67+
const (
68+
reasonFailedAttachVolume = "FailedAttachVolume"
69+
reasonFailedMount = "FailedMount"
70+
)
71+
6672
type Base interface {
6773
Init(vmop *v1alpha2.VirtualMachineOperation)
6874
ShouldExecuteOrSetFailedPhase(ctx context.Context, vmop *v1alpha2.VirtualMachineOperation) (bool, error)
@@ -290,6 +296,13 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp
290296
h.recorder.Event(vmop, corev1.EventTypeWarning, v1alpha2.ReasonErrVMOPFailed, "VirtualMachineOperation failed")
291297

292298
reason := h.getFailedReason(mig)
299+
if reason == vmopcondition.ReasonFailed {
300+
if prev, found := conditions.GetCondition(vmopcondition.TypeCompleted, vmop.Status.Conditions); found {
301+
if prev.Reason == vmopcondition.ReasonNotConverging.String() {
302+
reason = vmopcondition.ReasonNotConverging
303+
}
304+
}
305+
}
293306
msg := h.getFailedMessage(reason, mig)
294307
progress := h.calculateMigrationProgress(vmop, mig, reason)
295308
vmop.Status.Progress = ptr.To(progress)
@@ -318,10 +331,8 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp
318331
return err
319332
}
320333

321-
autoConverge := h.resolveAutoConverge(vmop)
322-
323334
if reason == vmopcondition.ReasonSyncing {
324-
record := migrationprogress.BuildRecord(vmop, mig, autoConverge, time.Now())
335+
record := migrationprogress.BuildRecord(vmop, mig, time.Now())
325336
if h.progressStrategy != nil && h.progressStrategy.IsNotConverging(record) {
326337
reason = vmopcondition.ReasonNotConverging
327338
msg = "Migration is not converging: data remaining is not decreasing at maximum throttle"
@@ -581,6 +592,9 @@ func (h LifecycleHandler) getInProgressReasonAndMessage(
581592
if isPodPendingUnschedulable(pod) {
582593
return vmopcondition.ReasonTargetUnschedulable, fmt.Sprintf("Target pod %q is unschedulable", pod.Namespace+"/"+pod.Name), nil
583594
}
595+
if diskErrMsg, hasDiskErr := h.getTargetPodDiskError(ctx, pod); hasDiskErr {
596+
return vmopcondition.ReasonTargetDiskError, fmt.Sprintf("Target pod has disk attach error: %s", diskErrMsg), nil
597+
}
584598

585599
if mig.Status.MigrationState != nil {
586600
state := mig.Status.MigrationState
@@ -614,7 +628,7 @@ func (h LifecycleHandler) calculateMigrationProgress(
614628
case vmopcondition.ReasonTargetDiskError:
615629
return progressTargetPreparing
616630
case vmopcondition.ReasonSyncing, vmopcondition.ReasonNotConverging:
617-
record := migrationprogress.BuildRecord(vmop, mig, h.resolveAutoConverge(vmop), time.Now())
631+
record := migrationprogress.BuildRecord(vmop, mig, time.Now())
618632
return h.progressStrategy.SyncProgress(record)
619633
case vmopcondition.ReasonSourceSuspended:
620634
h.forgetProgress(vmop)
@@ -634,14 +648,39 @@ func (h LifecycleHandler) calculateMigrationProgress(
634648
}
635649
}
636650

637-
func (h LifecycleHandler) resolveAutoConverge(vmop *v1alpha2.VirtualMachineOperation) bool {
638-
if vmop == nil {
639-
return false
651+
func (h LifecycleHandler) getTargetPodDiskError(ctx context.Context, pod *corev1.Pod) (string, bool) {
652+
if pod == nil {
653+
return "", false
640654
}
641-
if vmop.Spec.Force != nil && *vmop.Spec.Force {
642-
return true
655+
656+
for _, cs := range pod.Status.InitContainerStatuses {
657+
if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" {
658+
break
659+
}
643660
}
644-
return false
661+
for _, cs := range pod.Status.ContainerStatuses {
662+
if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" {
663+
eventList := &corev1.EventList{}
664+
err := h.client.List(ctx, eventList, &client.ListOptions{
665+
Namespace: pod.Namespace,
666+
FieldSelector: fields.SelectorFromSet(fields.Set{
667+
"involvedObject.name": pod.Name,
668+
"involvedObject.kind": "Pod",
669+
}),
670+
})
671+
if err != nil {
672+
return "", false
673+
}
674+
for _, e := range eventList.Items {
675+
if e.Type == corev1.EventTypeWarning && (e.Reason == reasonFailedAttachVolume || e.Reason == reasonFailedMount) {
676+
return fmt.Sprintf("%s: %s", e.Reason, e.Message), true
677+
}
678+
}
679+
return "", false
680+
}
681+
}
682+
683+
return "", false
645684
}
646685

647686
func (h LifecycleHandler) forgetProgress(vmop *v1alpha2.VirtualMachineOperation) {

0 commit comments

Comments
 (0)