diff --git a/go.mod b/go.mod index 4e0432734..2ed3fc951 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,7 @@ require ( github.com/vmware-tanzu/image-registry-operator-api v0.0.0-20250813160346-0f6259af5cbb github.com/vmware-tanzu/net-operator-api v0.0.0-20260501221253-4950cf50cd6b github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20260423081355-beab2417344a - github.com/vmware/govmomi v0.54.0 + github.com/vmware/govmomi v0.55.0-alpha.0.0.20260518191903-48ab34adb211 golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 golang.org/x/net v0.53.0 // indirect // * https://github.com/vmware-tanzu/vm-operator/security/dependabot/24 diff --git a/go.sum b/go.sum index 5926587c2..7f1a7e66c 100644 --- a/go.sum +++ b/go.sum @@ -160,8 +160,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20260501221253-4950cf50cd6b h1:C github.com/vmware-tanzu/net-operator-api v0.0.0-20260501221253-4950cf50cd6b/go.mod h1:w6QJGm3crIA16ZIz1FVQXD2NVeJhOgGXxW05RbVTSTo= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20260423081355-beab2417344a h1:yqGxhqSJ78veQjdOHINJLE9IWDcreMTzwDsOAdwrUWM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20260423081355-beab2417344a/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware/govmomi v0.54.0 h1:akEKkM9XKMOhTskmdzTLG8JzH+sh61jbFrVPbAzv5IQ= -github.com/vmware/govmomi v0.54.0/go.mod h1:0F3hChqXDrSQQnjfSiCqRE5lPD4aZlbOtKG4uroq2a4= +github.com/vmware/govmomi v0.55.0-alpha.0.0.20260518191903-48ab34adb211 h1:n8hoHi/26x5GaTKTS04PqC7bNrCh7Wa7Eh44RKTM214= +github.com/vmware/govmomi v0.55.0-alpha.0.0.20260518191903-48ab34adb211/go.mod h1:0F3hChqXDrSQQnjfSiCqRE5lPD4aZlbOtKG4uroq2a4= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= diff --git a/pkg/vmconfig/diskpromo/diskpromo_reconciler.go b/pkg/vmconfig/diskpromo/diskpromo_reconciler.go index 1711f46b3..ce92c6cce 100644 --- a/pkg/vmconfig/diskpromo/diskpromo_reconciler.go +++ b/pkg/vmconfig/diskpromo/diskpromo_reconciler.go @@ -8,6 +8,7 @@ import ( "context" "fmt" + "github.com/vmware/govmomi/fault" "github.com/vmware/govmomi/object" "github.com/vmware/govmomi/vim25" "github.com/vmware/govmomi/vim25/mo" @@ -29,9 +30,10 @@ type reconciler struct{} var _ vmconfig.Reconciler = reconciler{} const ( - ReasonTaskError = "DiskPromotionTaskError" - ReasonPending = "DiskPromotionPending" - ReasonRunning = "DiskPromotionRunning" + ReasonTaskError = "DiskPromotionTaskError" + ReasonTaskTransientError = "DiskPromotionTaskTransientError" + ReasonPending = "DiskPromotionPending" + ReasonRunning = "DiskPromotionRunning" PromoteDisksTaskKey = "VirtualMachine.promoteDisks" ) @@ -124,6 +126,37 @@ func (r reconciler) Reconcile( switch t.State { case vimtypes.TaskInfoStateError: + // A transient fault (e.g. ConcurrentAccess) means a competing + // vSphere operation was in flight when this task was attempted. + // The fault is self-resolving once that operation finishes — + // no operator action is required. + // + // Continuing the loop instead of returning allows the next + // reconcile to issue a fresh PromoteDisks task without waiting + // for the errored task to expire from RecentTask (~10 minutes). + // Two invariants ensure this does not create duplicate tasks: + // + // 1. If a PromoteDisks task is still running, the loop + // returns early above before reaching obj.PromoteDisks. + // 2. If the competing operation is still running, the + // runningTaskInfo guard below blocks obj.PromoteDisks + // until it completes. + // + // Ordering in RecentTask is also not a concern. All transient- + // errored tasks are skipped regardless of how many exist or + // where they appear. Any running or successful PromoteDisks + // task encountered anywhere in the loop still returns early + // and takes precedence. + if fault.IsTransientError(t.Error) { + pkgcond.MarkFalse( + vm, + vmopv1.VirtualMachineDiskPromotionSynced, + ReasonTaskTransientError, + "%s", + t.Error.LocalizedMessage) + continue + } + pkgcond.MarkFalse( vm, vmopv1.VirtualMachineDiskPromotionSynced, diff --git a/pkg/vmconfig/diskpromo/diskpromo_reconciler_test.go b/pkg/vmconfig/diskpromo/diskpromo_reconciler_test.go index 34b4ef19e..0d0bf0944 100644 --- a/pkg/vmconfig/diskpromo/diskpromo_reconciler_test.go +++ b/pkg/vmconfig/diskpromo/diskpromo_reconciler_test.go @@ -476,26 +476,106 @@ var _ = Describe("Reconcile", Label(testlabels.V1Alpha5), func() { }) }) - When("there promote disks is called while already running", func() { + When("there promote disks is called while already running", func() { + BeforeEach(func() { + ctx = pkgctx.WithVMRecentTasks(ctx, []vimtypes.TaskInfo{ + { + State: vimtypes.TaskInfoStateRunning, + DescriptionId: diskpromo.PromoteDisksTaskKey, + }, + }) + }) + It("should mark the condition as running", func() { + Expect(err).ToNot(HaveOccurred()) + c := conditions.Get(vm, vmopv1.VirtualMachineDiskPromotionSynced) + Expect(c).ToNot(BeNil()) + Expect(c.Status).To(Equal(metav1.ConditionFalse)) + Expect(c.Reason).To(Equal(diskpromo.ReasonRunning)) + Expect(c.Message).To(Equal("Promotion is running")) + }) + }) + + When("a previous promote disks task failed with a transient error", func() { + transientTaskInfo := vimtypes.TaskInfo{ + State: vimtypes.TaskInfoStateError, + DescriptionId: diskpromo.PromoteDisksTaskKey, + Error: &vimtypes.LocalizedMethodFault{ + Fault: &vimtypes.ConcurrentAccess{}, + LocalizedMessage: "concurrent access", + }, + } + + When("no other tasks are running", func() { + BeforeEach(func() { + ctx = pkgctx.WithVMRecentTasks(ctx, []vimtypes.TaskInfo{ + transientTaskInfo, + }) + }) + It("should issue a new promote disks task", func() { + Expect(err).To(MatchError(diskpromo.ErrPromoteDisks)) + c := conditions.Get(vm, vmopv1.VirtualMachineDiskPromotionSynced) + Expect(c).ToNot(BeNil()) + Expect(c.Status).To(Equal(metav1.ConditionFalse)) + Expect(c.Reason).To(Equal(diskpromo.ReasonRunning)) + }) + }) + + When("a competing task is still running", func() { + BeforeEach(func() { + ctx = pkgctx.WithVMRecentTasks(ctx, []vimtypes.TaskInfo{ + transientTaskInfo, + { + State: vimtypes.TaskInfoStateRunning, + DescriptionId: "fake.concurrent.task", + }, + }) + }) + It("should wait and mark pending", func() { + Expect(err).ToNot(HaveOccurred()) + c := conditions.Get(vm, vmopv1.VirtualMachineDiskPromotionSynced) + Expect(c).ToNot(BeNil()) + Expect(c.Status).To(Equal(metav1.ConditionFalse)) + Expect(c.Reason).To(Equal(diskpromo.ReasonPending)) + }) + }) + + When("a promote disks task is already running", func() { BeforeEach(func() { ctx = pkgctx.WithVMRecentTasks(ctx, []vimtypes.TaskInfo{ + transientTaskInfo, { State: vimtypes.TaskInfoStateRunning, DescriptionId: diskpromo.PromoteDisksTaskKey, }, }) }) - It("should mark the condition as running", func() { + It("should wait and mark running", func() { Expect(err).ToNot(HaveOccurred()) c := conditions.Get(vm, vmopv1.VirtualMachineDiskPromotionSynced) Expect(c).ToNot(BeNil()) Expect(c.Status).To(Equal(metav1.ConditionFalse)) Expect(c.Reason).To(Equal(diskpromo.ReasonRunning)) - Expect(c.Message).To(Equal("Promotion is running")) }) }) - When("VM has no child disks and no existing condition", func() { + When("multiple transient errors are present", func() { + BeforeEach(func() { + ctx = pkgctx.WithVMRecentTasks(ctx, []vimtypes.TaskInfo{ + transientTaskInfo, + transientTaskInfo, + }) + }) + It("should issue a new promote disks task", func() { + Expect(err).To(MatchError(diskpromo.ErrPromoteDisks)) + c := conditions.Get(vm, vmopv1.VirtualMachineDiskPromotionSynced) + Expect(c).ToNot(BeNil()) + Expect(c.Status).To(Equal(metav1.ConditionFalse)) + Expect(c.Reason).To(Equal(diskpromo.ReasonRunning)) + }) + }) + }) + + When("VM has no child disks and no existing condition", func() { BeforeEach(func() { // Remove all child disks moVM.Config.Hardware.Device = nil diff --git a/test/e2e/go.mod b/test/e2e/go.mod index 08a03cfc6..77a4ef0f2 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -66,7 +66,7 @@ require ( github.com/vmware-tanzu/vm-operator/external/tanzu-topology v0.0.0-00010101000000-000000000000 github.com/vmware-tanzu/vm-operator/external/vsphere-csi-driver v0.0.0-00010101000000-000000000000 github.com/vmware-tanzu/vm-operator/pkg/backup/api v0.0.0-00010101000000-000000000000 - github.com/vmware/govmomi v0.54.0 + github.com/vmware/govmomi v0.55.0-alpha.0.0.20260518191903-48ab34adb211 golang.org/x/crypto v0.50.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 diff --git a/test/e2e/go.sum b/test/e2e/go.sum index 2186df478..1fdccfa6d 100644 --- a/test/e2e/go.sum +++ b/test/e2e/go.sum @@ -218,8 +218,8 @@ github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/vmware-tanzu/net-operator-api v0.0.0-20260501221253-4950cf50cd6b h1:Cjg5Iy0Lusv81iISuxHZuikSfYSy1SeePu7iqH214BY= github.com/vmware-tanzu/net-operator-api v0.0.0-20260501221253-4950cf50cd6b/go.mod h1:w6QJGm3crIA16ZIz1FVQXD2NVeJhOgGXxW05RbVTSTo= -github.com/vmware/govmomi v0.54.0 h1:akEKkM9XKMOhTskmdzTLG8JzH+sh61jbFrVPbAzv5IQ= -github.com/vmware/govmomi v0.54.0/go.mod h1:0F3hChqXDrSQQnjfSiCqRE5lPD4aZlbOtKG4uroq2a4= +github.com/vmware/govmomi v0.55.0-alpha.0.0.20260518191903-48ab34adb211 h1:n8hoHi/26x5GaTKTS04PqC7bNrCh7Wa7Eh44RKTM214= +github.com/vmware/govmomi v0.55.0-alpha.0.0.20260518191903-48ab34adb211/go.mod h1:0F3hChqXDrSQQnjfSiCqRE5lPD4aZlbOtKG4uroq2a4= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=