Skip to content

Commit 4d0276f

Browse files
test: skip StorageClassMigration if msg 'Will be reverted'
Signed-off-by: Nikita Korolev <nikita.korolev@flant.com>
1 parent cd2b64a commit 4d0276f

4 files changed

Lines changed: 244 additions & 15 deletions

File tree

test/e2e/controller/controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
const (
2525
VirtualizationController = "virtualization-controller"
2626
VirtualizationNamespace = "d8-virtualization"
27+
LeaderElectionID = "d8-virt-operator-leader-election-helper"
2728
)
2829

2930
var (

test/e2e/internal/util/vm.go

Lines changed: 201 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ package util
1919
import (
2020
"context"
2121
"fmt"
22+
"io"
2223
"regexp"
24+
"slices"
25+
"strings"
2326
"time"
2427

2528
. "github.com/onsi/ginkgo/v2"
@@ -28,13 +31,15 @@ import (
2831
k8serrors "k8s.io/apimachinery/pkg/api/errors"
2932
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3033
"k8s.io/apimachinery/pkg/types"
34+
"k8s.io/apimachinery/pkg/util/wait"
3135
virtv1 "kubevirt.io/api/core/v1"
3236
"sigs.k8s.io/controller-runtime/pkg/client"
3337

3438
vmopbuilder "github.com/deckhouse/virtualization-controller/pkg/builder/vmop"
3539
"github.com/deckhouse/virtualization-controller/pkg/controller/conditions"
3640
"github.com/deckhouse/virtualization/api/core/v1alpha2"
3741
"github.com/deckhouse/virtualization/api/core/v1alpha2/vmcondition"
42+
"github.com/deckhouse/virtualization/test/e2e/controller"
3843
"github.com/deckhouse/virtualization/test/e2e/internal/framework"
3944
"github.com/deckhouse/virtualization/test/e2e/internal/rewrite"
4045
)
@@ -46,6 +51,18 @@ const (
4651

4752
var knownKubeVirtClientSocketClosedRe = regexp.MustCompile(`(?is)virError\(Code=1,.*internal error:\s*client\s+socket\s+is\s+closed`)
4853

54+
var knownVDMigrationControllerRevertMessages = []string{
55+
"VirtualMachine is not running. Will be reverted.",
56+
"VirtualMachine is not migrating. Will be reverted.",
57+
"Target PersistentVolumeClaim is not found. Revert old PersistentVolumeClaim and remove migration condition.",
58+
"Target PersistentVolumeClaim is not bound. Revert old PersistentVolumeClaim and remove migration condition.",
59+
}
60+
61+
type controllerLogMatch struct {
62+
PodName string
63+
Line string
64+
}
65+
4966
func IsKnownKubeVirtClientSocketClosedFailureReason(reason string) bool {
5067
return knownKubeVirtClientSocketClosedRe.MatchString(reason)
5168
}
@@ -106,6 +123,188 @@ func SkipIfKnownMigrationFailure(vm *v1alpha2.VirtualMachine) {
106123
SkipIfKnownVolumesUpdateMigrationFailure(vm)
107124
}
108125

126+
func WaitUntilConditionOrSkipKnownVDMigrationControllerRevert(timeout time.Duration, namespace string, condition func() error) {
127+
GinkgoHelper()
128+
129+
waitStartedAt := time.Now()
130+
var lastErr error
131+
132+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
133+
defer cancel()
134+
135+
err := wait.PollUntilContextTimeout(ctx, time.Second, timeout, true, func(context.Context) (bool, error) {
136+
lastErr = condition()
137+
return lastErr == nil, nil
138+
})
139+
if err == nil {
140+
return
141+
}
142+
143+
if ctx.Err() == context.DeadlineExceeded {
144+
SkipIfKnownVDMigrationControllerRevertOnTimeout(namespace, waitStartedAt)
145+
}
146+
147+
if lastErr != nil {
148+
Fail(fmt.Sprintf("timed out waiting for condition: %v", lastErr))
149+
}
150+
151+
Expect(err).NotTo(HaveOccurred())
152+
}
153+
154+
func SkipIfKnownVDMigrationControllerRevertOnTimeout(namespace string, since time.Time) {
155+
GinkgoHelper()
156+
157+
match, err := findKnownVDMigrationControllerRevertLog(namespace, since)
158+
if err != nil {
159+
GinkgoWriter.Printf("Failed to inspect virtualization-controller logs for namespace %q: %v\n", namespace, err)
160+
return
161+
}
162+
if match == nil {
163+
return
164+
}
165+
166+
Skip(fmt.Sprintf(
167+
"skip due to known virtualization-controller volume migration revert for namespace %s in pod %s: %s",
168+
namespace, match.PodName, match.Line,
169+
))
170+
}
171+
172+
func findKnownVDMigrationControllerRevertLog(namespace string, since time.Time) (*controllerLogMatch, error) {
173+
ctx, cancel := context.WithTimeout(context.Background(), framework.ShortTimeout)
174+
defer cancel()
175+
176+
pods, err := framework.GetClients().KubeClient().CoreV1().Pods(controller.VirtualizationNamespace).List(ctx, metav1.ListOptions{
177+
LabelSelector: fmt.Sprintf("app=%s", controller.VirtualizationController),
178+
})
179+
if err != nil {
180+
return nil, fmt.Errorf("list virtualization-controller pods: %w", err)
181+
}
182+
orderedPods, err := orderVirtualizationControllerPodsByLeader(ctx, pods.Items)
183+
if err != nil {
184+
GinkgoWriter.Printf("Failed to resolve virtualization-controller leader pod, fallback to all pods: %v\n", err)
185+
orderedPods = pods.Items
186+
}
187+
188+
sinceTime := metav1.NewTime(since.Add(-5 * time.Second))
189+
for _, pod := range orderedPods {
190+
stream, err := framework.GetClients().KubeClient().CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{
191+
Container: controller.VirtualizationController,
192+
SinceTime: &sinceTime,
193+
}).Stream(ctx)
194+
if err != nil {
195+
GinkgoWriter.Printf("Failed to read virtualization-controller logs from pod %s: %v\n", pod.Name, err)
196+
continue
197+
}
198+
199+
logs, readErr := io.ReadAll(stream)
200+
closeErr := stream.Close()
201+
if readErr != nil {
202+
return nil, fmt.Errorf("read virtualization-controller logs from pod %s: %w", pod.Name, readErr)
203+
}
204+
if closeErr != nil {
205+
GinkgoWriter.Printf("Failed to close virtualization-controller log stream for pod %s: %v\n", pod.Name, closeErr)
206+
}
207+
208+
if line := findKnownVDMigrationControllerRevertLine(string(logs), namespace); line != "" {
209+
return &controllerLogMatch{
210+
PodName: pod.Name,
211+
Line: line,
212+
}, nil
213+
}
214+
}
215+
216+
return nil, nil
217+
}
218+
219+
func orderVirtualizationControllerPodsByLeader(ctx context.Context, pods []corev1.Pod) ([]corev1.Pod, error) {
220+
if len(pods) <= 1 {
221+
return pods, nil
222+
}
223+
if !isVirtualizationControllerLeaderElectionEnabled(pods) {
224+
return pods, nil
225+
}
226+
227+
lease, err := framework.GetClients().KubeClient().CoordinationV1().Leases(controller.VirtualizationNamespace).Get(ctx, controller.LeaderElectionID, metav1.GetOptions{})
228+
if err != nil {
229+
if k8serrors.IsNotFound(err) {
230+
return pods, nil
231+
}
232+
return nil, fmt.Errorf("get leader election lease %q: %w", controller.LeaderElectionID, err)
233+
}
234+
if lease.Spec.HolderIdentity == nil || *lease.Spec.HolderIdentity == "" {
235+
return pods, nil
236+
}
237+
238+
holderIdentity := *lease.Spec.HolderIdentity
239+
leaderIdx := slices.IndexFunc(pods, func(pod corev1.Pod) bool {
240+
return pod.Name == holderIdentity || strings.HasPrefix(holderIdentity, pod.Name+"_")
241+
})
242+
if leaderIdx == -1 {
243+
GinkgoWriter.Printf("Virtualization-controller leader lease holder %q does not match listed pods; fallback to all pods\n", holderIdentity)
244+
return pods, nil
245+
}
246+
247+
orderedPods := make([]corev1.Pod, 0, len(pods))
248+
orderedPods = append(orderedPods, pods[leaderIdx])
249+
for i, pod := range pods {
250+
if i == leaderIdx {
251+
continue
252+
}
253+
orderedPods = append(orderedPods, pod)
254+
}
255+
256+
return orderedPods, nil
257+
}
258+
259+
func isVirtualizationControllerLeaderElectionEnabled(pods []corev1.Pod) bool {
260+
for _, pod := range pods {
261+
for _, container := range pod.Spec.Containers {
262+
if container.Name != controller.VirtualizationController {
263+
continue
264+
}
265+
return isLeaderElectionEnabledByArgs(container.Args)
266+
}
267+
}
268+
269+
// The controller uses a default value of true when the flag is not passed.
270+
return true
271+
}
272+
273+
func isLeaderElectionEnabledByArgs(args []string) bool {
274+
enabled := true
275+
276+
for i, arg := range args {
277+
switch {
278+
case arg == "--leader-election" && i+1 < len(args) && !strings.HasPrefix(args[i+1], "--"):
279+
enabled = args[i+1] != "false"
280+
case arg == "--leader-election":
281+
enabled = true
282+
case arg == "--leader-election=true":
283+
enabled = true
284+
case arg == "--leader-election=false":
285+
enabled = false
286+
case strings.HasPrefix(arg, "--leader-election="):
287+
enabled = strings.TrimPrefix(arg, "--leader-election=") != "false"
288+
}
289+
}
290+
291+
return enabled
292+
}
293+
294+
func findKnownVDMigrationControllerRevertLine(logs, namespace string) string {
295+
for _, line := range strings.Split(logs, "\n") {
296+
if !strings.Contains(line, namespace) {
297+
continue
298+
}
299+
for _, message := range knownVDMigrationControllerRevertMessages {
300+
if strings.Contains(line, message) {
301+
return strings.TrimSpace(line)
302+
}
303+
}
304+
}
305+
return ""
306+
}
307+
109308
func getInternalVirtualMachineInstance(vm *v1alpha2.VirtualMachine) (*virtv1.VirtualMachineInstance, error) {
110309
GinkgoHelper()
111310

@@ -152,7 +351,7 @@ func UntilSSHReady(f *framework.Framework, vm *v1alpha2.VirtualMachine, timeout
152351
func UntilVMMigrationSucceeded(key client.ObjectKey, timeout time.Duration) {
153352
GinkgoHelper()
154353

155-
Eventually(func() error {
354+
WaitUntilConditionOrSkipKnownVDMigrationControllerRevert(timeout, key.Namespace, func() error {
156355
vm, err := framework.GetClients().VirtClient().VirtualMachines(key.Namespace).Get(context.Background(), key.Name, metav1.GetOptions{})
157356
if err != nil {
158357
return err
@@ -177,7 +376,7 @@ func UntilVMMigrationSucceeded(key client.ObjectKey, timeout time.Duration) {
177376
}
178377

179378
return nil
180-
}).WithTimeout(timeout).WithPolling(time.Second).Should(Succeed())
379+
})
181380
}
182381

183382
func MigrateVirtualMachine(f *framework.Framework, vm *v1alpha2.VirtualMachine, options ...vmopbuilder.Option) {

test/e2e/vm/util.go

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -129,33 +129,60 @@ func untilVirtualDisksMigrationsSucceeded(f *framework.Framework) {
129129
GinkgoHelper()
130130

131131
By("Wait until VirtualDisks migrations succeeded")
132-
Eventually(func(g Gomega) {
132+
e2eutil.WaitUntilConditionOrSkipKnownVDMigrationControllerRevert(framework.MaxTimeout, f.Namespace().Name, func() error {
133133
vms, err := f.VirtClient().VirtualMachines(f.Namespace().Name).List(context.Background(), metav1.ListOptions{})
134-
g.Expect(err).NotTo(HaveOccurred())
134+
if err != nil {
135+
return err
136+
}
135137
for _, vm := range vms.Items {
136138
// TODO: remove temporary migration skip logic when both known issues are fixed:
137139
// kubevirt "client socket is closed" and Volume(s)UpdateError.
138140
e2eutil.SkipIfKnownMigrationFailure(&vm)
139141
}
140142

141143
vds, err := f.VirtClient().VirtualDisks(f.Namespace().Name).List(context.Background(), metav1.ListOptions{})
142-
g.Expect(err).NotTo(HaveOccurred())
144+
if err != nil {
145+
return err
146+
}
143147

144-
g.Expect(vds.Items).ShouldNot(BeEmpty())
148+
if len(vds.Items) == 0 {
149+
return fmt.Errorf("virtual disk list is empty")
150+
}
145151
for _, vd := range vds.Items {
146-
g.Expect(vd.Status.Phase).To(Equal(v1alpha2.DiskReady))
147-
g.Expect(vd.Status.Target.PersistentVolumeClaim).ShouldNot(BeEmpty())
152+
if vd.Status.Phase != v1alpha2.DiskReady {
153+
return fmt.Errorf("vd %s phase is %s, expected %s", vd.Name, vd.Status.Phase, v1alpha2.DiskReady)
154+
}
155+
if vd.Status.Target.PersistentVolumeClaim == "" {
156+
return fmt.Errorf("vd %s target pvc is empty", vd.Name)
157+
}
148158

149159
if vd.Status.MigrationState.StartTimestamp.IsZero() {
150160
// Skip the disks that are not migrated
151161
continue
152162
}
153163

154-
g.Expect(vd.Status.MigrationState.EndTimestamp.IsZero()).Should(BeFalse(), "migration is not ended for vd %s", vd.Name)
155-
g.Expect(vd.Status.Target.PersistentVolumeClaim).To(Equal(vd.Status.MigrationState.TargetPVC))
156-
g.Expect(vd.Status.MigrationState.Result).To(Equal(v1alpha2.VirtualDiskMigrationResultSucceeded))
164+
if vd.Status.MigrationState.EndTimestamp.IsZero() {
165+
return fmt.Errorf("migration is not ended for vd %s", vd.Name)
166+
}
167+
if vd.Status.Target.PersistentVolumeClaim != vd.Status.MigrationState.TargetPVC {
168+
return fmt.Errorf(
169+
"vd %s target pvc mismatch: current=%s migration-target=%s",
170+
vd.Name,
171+
vd.Status.Target.PersistentVolumeClaim,
172+
vd.Status.MigrationState.TargetPVC,
173+
)
174+
}
175+
if vd.Status.MigrationState.Result != v1alpha2.VirtualDiskMigrationResultSucceeded {
176+
return fmt.Errorf(
177+
"vd %s migration result is %s, expected %s",
178+
vd.Name,
179+
vd.Status.MigrationState.Result,
180+
v1alpha2.VirtualDiskMigrationResultSucceeded,
181+
)
182+
}
157183
}
158-
}).WithTimeout(framework.MaxTimeout).WithPolling(time.Second).Should(Succeed())
184+
return nil
185+
})
159186
}
160187

161188
func untilVirtualDisksMigrationsFailed(f *framework.Framework) {

test/e2e/vm/volume_migration_storage_class_changed.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ var _ = Describe("StorageClassMigration", decoratorsForVolumeMigrations(), func(
236236
err = patchStorageClassName(context.Background(), f, sc, vdForMigration)
237237
Expect(err).NotTo(HaveOccurred())
238238

239-
Eventually(func() error {
239+
util.WaitUntilConditionOrSkipKnownVDMigrationControllerRevert(framework.MaxTimeout, ns, func() error {
240240
vm, err = f.VirtClient().VirtualMachines(ns).Get(context.Background(), vm.GetName(), metav1.GetOptions{})
241241
if err != nil {
242242
return err
@@ -247,7 +247,9 @@ var _ = Describe("StorageClassMigration", decoratorsForVolumeMigrations(), func(
247247

248248
var lastVMOP *v1alpha2.VirtualMachineOperation
249249
vmops, err := f.VirtClient().VirtualMachineOperations(ns).List(context.Background(), metav1.ListOptions{})
250-
Expect(err).NotTo(HaveOccurred())
250+
if err != nil {
251+
return err
252+
}
251253

252254
for _, vmop := range vmops.Items {
253255
if vmop.Spec.VirtualMachine == vm.Name {
@@ -271,7 +273,7 @@ var _ = Describe("StorageClassMigration", decoratorsForVolumeMigrations(), func(
271273
}
272274

273275
return fmt.Errorf("migration is not completed")
274-
}).WithTimeout(framework.MaxTimeout).WithPolling(time.Second).Should(Succeed())
276+
})
275277

276278
By("Wait until VM migration succeeded")
277279
util.UntilVMMigrationSucceeded(crclient.ObjectKeyFromObject(vm), framework.MaxTimeout)

0 commit comments

Comments
 (0)