@@ -11,7 +11,6 @@ import (
1111
1212 "github.com/Azure/agentbaker/e2e/config"
1313 "github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
14- "github.com/stretchr/testify/require"
1514 appsv1 "k8s.io/api/apps/v1"
1615 corev1 "k8s.io/api/core/v1"
1716 v1 "k8s.io/api/core/v1"
@@ -145,58 +144,59 @@ func (k *Kubeclient) WaitUntilPodRunning(ctx context.Context, namespace string,
145144
146145func (k * Kubeclient ) WaitUntilNodeReady (ctx context.Context , t testing.TB , vmssName string ) string {
147146 startTime := time .Now ()
148- t .Logf ("waiting for node %s to be ready in k8s API " , vmssName )
147+ t .Logf ("waiting for node %s to be ready" , vmssName )
149148 defer func () {
150- t .Logf ("waited for node %s to be ready in k8s API for %s" , vmssName , time .Since (startTime ))
149+ t .Logf ("waited for node %s to be ready for %s" , vmssName , time .Since (startTime ))
151150 }()
152151
153- var node * corev1.Node = nil
154- watcher , err := k .Typed .CoreV1 ().Nodes ().Watch (ctx , metav1.ListOptions {})
155- require .NoError (t , err , "failed to start watching nodes" )
156- defer watcher .Stop ()
157-
158- for event := range watcher .ResultChan () {
159- if event .Type != watch .Added && event .Type != watch .Modified {
160- continue
161- }
162-
163- var nodeFromEvent * corev1.Node
164- switch v := event .Object .(type ) {
165- case * corev1.Node :
166- nodeFromEvent = v
167-
168- default :
169- t .Logf ("skipping object type %T" , event .Object )
170- continue
171- }
172-
173- if ! strings .HasPrefix (nodeFromEvent .Name , vmssName ) {
174- continue
175- }
176-
177- // found the right node. Use it!
178- node = nodeFromEvent
179- nodeTaints , _ := json .Marshal (node .Spec .Taints )
180- nodeConditions , _ := json .Marshal (node .Status .Conditions )
152+ var lastNode * corev1.Node
153+ for ctx .Err () == nil {
154+ name := func () string {
155+ watcher , err := k .Typed .CoreV1 ().Nodes ().Watch (ctx , metav1.ListOptions {})
156+ if err != nil {
157+ t .Logf ("failed to start node watch: %v, retrying in 5s" , err )
158+ select {
159+ case <- ctx .Done ():
160+ case <- time .After (5 * time .Second ):
161+ }
162+ return ""
163+ }
164+ defer watcher .Stop ()
181165
182- for _ , cond := range node .Status .Conditions {
183- if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
184- t .Logf ("node %s is ready. Taints: %s Conditions: %s" , node .Name , string (nodeTaints ), string (nodeConditions ))
185- return node .Name
166+ for event := range watcher .ResultChan () {
167+ if event .Type == watch .Error {
168+ t .Logf ("node watch error: %v" , event .Object )
169+ return ""
170+ }
171+ node , ok := event .Object .(* corev1.Node )
172+ if ! ok || ! strings .HasPrefix (node .Name , vmssName ) {
173+ continue
174+ }
175+ if event .Type == watch .Deleted {
176+ t .Fatalf ("node %s was deleted" , node .Name )
177+ }
178+ lastNode = node
179+ for _ , cond := range node .Status .Conditions {
180+ if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
181+ t .Logf ("node %s is ready" , node .Name )
182+ return node .Name
183+ }
184+ }
186185 }
186+ return ""
187+ }()
188+ if name != "" {
189+ return name
187190 }
188-
189- t .Logf ("node %s is not ready. Taints: %s Conditions: %s" , node .Name , string (nodeTaints ), string (nodeConditions ))
191+ t .Logf ("node watch disconnected, restarting" )
190192 }
191193
192- if node = = nil {
193- t . Fatalf ( "%q haven't appeared in k8s API server" , vmssName )
194- return ""
194+ if lastNode ! = nil {
195+ nodeJSON , _ := json . Marshal ( lastNode )
196+ t . Fatalf ( "node %s (%s) not ready: %v \n %s" , vmssName , lastNode . Name , ctx . Err (), nodeJSON )
195197 }
196-
197- nodeString , _ := json .Marshal (node )
198- t .Fatalf ("failed to wait for %q (%s) to be ready %+v. Detail: %s" , vmssName , node .Name , node .Status , string (nodeString ))
199- return node .Name
198+ t .Fatalf ("node %q not found: %v" , vmssName , ctx .Err ())
199+ return ""
200200}
201201
202202// GetPodNetworkDebugPodForNode returns a pod that's a member of the 'debugnonhost' daemonset running in the cluster - this will return
0 commit comments