@@ -19,7 +19,10 @@ package util
1919import (
2020 "context"
2121 "fmt"
22+ "io"
2223 "regexp"
24+ "slices"
25+ "strings"
2326 "time"
2427
2528 . "github.com/onsi/ginkgo/v2"
@@ -28,13 +31,15 @@ import (
2831 k8serrors "k8s.io/apimachinery/pkg/api/errors"
2932 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3033 "k8s.io/apimachinery/pkg/types"
34+ "k8s.io/apimachinery/pkg/util/wait"
3135 virtv1 "kubevirt.io/api/core/v1"
3236 "sigs.k8s.io/controller-runtime/pkg/client"
3337
3438 vmopbuilder "github.com/deckhouse/virtualization-controller/pkg/builder/vmop"
3539 "github.com/deckhouse/virtualization-controller/pkg/controller/conditions"
3640 "github.com/deckhouse/virtualization/api/core/v1alpha2"
3741 "github.com/deckhouse/virtualization/api/core/v1alpha2/vmcondition"
42+ "github.com/deckhouse/virtualization/test/e2e/controller"
3843 "github.com/deckhouse/virtualization/test/e2e/internal/framework"
3944 "github.com/deckhouse/virtualization/test/e2e/internal/rewrite"
4045)
@@ -46,6 +51,18 @@ const (
4651
4752var knownKubeVirtClientSocketClosedRe = regexp .MustCompile (`(?is)virError\(Code=1,.*internal error:\s*client\s+socket\s+is\s+closed` )
4853
54+ var knownVDMigrationControllerRevertMessages = []string {
55+ "VirtualMachine is not running. Will be reverted." ,
56+ "VirtualMachine is not migrating. Will be reverted." ,
57+ "Target PersistentVolumeClaim is not found. Revert old PersistentVolumeClaim and remove migration condition." ,
58+ "Target PersistentVolumeClaim is not bound. Revert old PersistentVolumeClaim and remove migration condition." ,
59+ }
60+
61+ type controllerLogMatch struct {
62+ PodName string
63+ Line string
64+ }
65+
4966func IsKnownKubeVirtClientSocketClosedFailureReason (reason string ) bool {
5067 return knownKubeVirtClientSocketClosedRe .MatchString (reason )
5168}
@@ -106,6 +123,188 @@ func SkipIfKnownMigrationFailure(vm *v1alpha2.VirtualMachine) {
106123 SkipIfKnownVolumesUpdateMigrationFailure (vm )
107124}
108125
126+ func WaitUntilConditionOrSkipKnownVDMigrationControllerRevert (timeout time.Duration , namespace string , condition func () error ) {
127+ GinkgoHelper ()
128+
129+ waitStartedAt := time .Now ()
130+ var lastErr error
131+
132+ ctx , cancel := context .WithTimeout (context .Background (), timeout )
133+ defer cancel ()
134+
135+ err := wait .PollUntilContextTimeout (ctx , time .Second , timeout , true , func (context.Context ) (bool , error ) {
136+ lastErr = condition ()
137+ return lastErr == nil , nil
138+ })
139+ if err == nil {
140+ return
141+ }
142+
143+ if ctx .Err () == context .DeadlineExceeded {
144+ SkipIfKnownVDMigrationControllerRevertOnTimeout (namespace , waitStartedAt )
145+ }
146+
147+ if lastErr != nil {
148+ Fail (fmt .Sprintf ("timed out waiting for condition: %v" , lastErr ))
149+ }
150+
151+ Expect (err ).NotTo (HaveOccurred ())
152+ }
153+
154+ func SkipIfKnownVDMigrationControllerRevertOnTimeout (namespace string , since time.Time ) {
155+ GinkgoHelper ()
156+
157+ match , err := findKnownVDMigrationControllerRevertLog (namespace , since )
158+ if err != nil {
159+ GinkgoWriter .Printf ("Failed to inspect virtualization-controller logs for namespace %q: %v\n " , namespace , err )
160+ return
161+ }
162+ if match == nil {
163+ return
164+ }
165+
166+ Skip (fmt .Sprintf (
167+ "skip due to known virtualization-controller volume migration revert for namespace %s in pod %s: %s" ,
168+ namespace , match .PodName , match .Line ,
169+ ))
170+ }
171+
172+ func findKnownVDMigrationControllerRevertLog (namespace string , since time.Time ) (* controllerLogMatch , error ) {
173+ ctx , cancel := context .WithTimeout (context .Background (), framework .ShortTimeout )
174+ defer cancel ()
175+
176+ pods , err := framework .GetClients ().KubeClient ().CoreV1 ().Pods (controller .VirtualizationNamespace ).List (ctx , metav1.ListOptions {
177+ LabelSelector : fmt .Sprintf ("app=%s" , controller .VirtualizationController ),
178+ })
179+ if err != nil {
180+ return nil , fmt .Errorf ("list virtualization-controller pods: %w" , err )
181+ }
182+ orderedPods , err := orderVirtualizationControllerPodsByLeader (ctx , pods .Items )
183+ if err != nil {
184+ GinkgoWriter .Printf ("Failed to resolve virtualization-controller leader pod, fallback to all pods: %v\n " , err )
185+ orderedPods = pods .Items
186+ }
187+
188+ sinceTime := metav1 .NewTime (since .Add (- 5 * time .Second ))
189+ for _ , pod := range orderedPods {
190+ stream , err := framework .GetClients ().KubeClient ().CoreV1 ().Pods (pod .Namespace ).GetLogs (pod .Name , & corev1.PodLogOptions {
191+ Container : controller .VirtualizationController ,
192+ SinceTime : & sinceTime ,
193+ }).Stream (ctx )
194+ if err != nil {
195+ GinkgoWriter .Printf ("Failed to read virtualization-controller logs from pod %s: %v\n " , pod .Name , err )
196+ continue
197+ }
198+
199+ logs , readErr := io .ReadAll (stream )
200+ closeErr := stream .Close ()
201+ if readErr != nil {
202+ return nil , fmt .Errorf ("read virtualization-controller logs from pod %s: %w" , pod .Name , readErr )
203+ }
204+ if closeErr != nil {
205+ GinkgoWriter .Printf ("Failed to close virtualization-controller log stream for pod %s: %v\n " , pod .Name , closeErr )
206+ }
207+
208+ if line := findKnownVDMigrationControllerRevertLine (string (logs ), namespace ); line != "" {
209+ return & controllerLogMatch {
210+ PodName : pod .Name ,
211+ Line : line ,
212+ }, nil
213+ }
214+ }
215+
216+ return nil , nil
217+ }
218+
219+ func orderVirtualizationControllerPodsByLeader (ctx context.Context , pods []corev1.Pod ) ([]corev1.Pod , error ) {
220+ if len (pods ) <= 1 {
221+ return pods , nil
222+ }
223+ if ! isVirtualizationControllerLeaderElectionEnabled (pods ) {
224+ return pods , nil
225+ }
226+
227+ lease , err := framework .GetClients ().KubeClient ().CoordinationV1 ().Leases (controller .VirtualizationNamespace ).Get (ctx , controller .LeaderElectionID , metav1.GetOptions {})
228+ if err != nil {
229+ if k8serrors .IsNotFound (err ) {
230+ return pods , nil
231+ }
232+ return nil , fmt .Errorf ("get leader election lease %q: %w" , controller .LeaderElectionID , err )
233+ }
234+ if lease .Spec .HolderIdentity == nil || * lease .Spec .HolderIdentity == "" {
235+ return pods , nil
236+ }
237+
238+ holderIdentity := * lease .Spec .HolderIdentity
239+ leaderIdx := slices .IndexFunc (pods , func (pod corev1.Pod ) bool {
240+ return pod .Name == holderIdentity || strings .HasPrefix (holderIdentity , pod .Name + "_" )
241+ })
242+ if leaderIdx == - 1 {
243+ GinkgoWriter .Printf ("Virtualization-controller leader lease holder %q does not match listed pods; fallback to all pods\n " , holderIdentity )
244+ return pods , nil
245+ }
246+
247+ orderedPods := make ([]corev1.Pod , 0 , len (pods ))
248+ orderedPods = append (orderedPods , pods [leaderIdx ])
249+ for i , pod := range pods {
250+ if i == leaderIdx {
251+ continue
252+ }
253+ orderedPods = append (orderedPods , pod )
254+ }
255+
256+ return orderedPods , nil
257+ }
258+
259+ func isVirtualizationControllerLeaderElectionEnabled (pods []corev1.Pod ) bool {
260+ for _ , pod := range pods {
261+ for _ , container := range pod .Spec .Containers {
262+ if container .Name != controller .VirtualizationController {
263+ continue
264+ }
265+ return isLeaderElectionEnabledByArgs (container .Args )
266+ }
267+ }
268+
269+ // The controller uses a default value of true when the flag is not passed.
270+ return true
271+ }
272+
273+ func isLeaderElectionEnabledByArgs (args []string ) bool {
274+ enabled := true
275+
276+ for i , arg := range args {
277+ switch {
278+ case arg == "--leader-election" && i + 1 < len (args ) && ! strings .HasPrefix (args [i + 1 ], "--" ):
279+ enabled = args [i + 1 ] != "false"
280+ case arg == "--leader-election" :
281+ enabled = true
282+ case arg == "--leader-election=true" :
283+ enabled = true
284+ case arg == "--leader-election=false" :
285+ enabled = false
286+ case strings .HasPrefix (arg , "--leader-election=" ):
287+ enabled = strings .TrimPrefix (arg , "--leader-election=" ) != "false"
288+ }
289+ }
290+
291+ return enabled
292+ }
293+
294+ func findKnownVDMigrationControllerRevertLine (logs , namespace string ) string {
295+ for _ , line := range strings .Split (logs , "\n " ) {
296+ if ! strings .Contains (line , namespace ) {
297+ continue
298+ }
299+ for _ , message := range knownVDMigrationControllerRevertMessages {
300+ if strings .Contains (line , message ) {
301+ return strings .TrimSpace (line )
302+ }
303+ }
304+ }
305+ return ""
306+ }
307+
109308func getInternalVirtualMachineInstance (vm * v1alpha2.VirtualMachine ) (* virtv1.VirtualMachineInstance , error ) {
110309 GinkgoHelper ()
111310
@@ -152,7 +351,7 @@ func UntilSSHReady(f *framework.Framework, vm *v1alpha2.VirtualMachine, timeout
152351func UntilVMMigrationSucceeded (key client.ObjectKey , timeout time.Duration ) {
153352 GinkgoHelper ()
154353
155- Eventually ( func () error {
354+ WaitUntilConditionOrSkipKnownVDMigrationControllerRevert ( timeout , key . Namespace , func () error {
156355 vm , err := framework .GetClients ().VirtClient ().VirtualMachines (key .Namespace ).Get (context .Background (), key .Name , metav1.GetOptions {})
157356 if err != nil {
158357 return err
@@ -177,7 +376,7 @@ func UntilVMMigrationSucceeded(key client.ObjectKey, timeout time.Duration) {
177376 }
178377
179378 return nil
180- }). WithTimeout ( timeout ). WithPolling ( time . Second ). Should ( Succeed ())
379+ })
181380}
182381
183382func MigrateVirtualMachine (f * framework.Framework , vm * v1alpha2.VirtualMachine , options ... vmopbuilder.Option ) {
0 commit comments