Skip to content

Commit 3b80171

Browse files
committed
simplify WaitUntilNodeReady and add bastion creation timing
Inline single-use helpers (failNodeNotReady, isNodeReady, logNodeDetail) into WaitUntilNodeReady. Add watch retry logic for disconnections and context cancellation. Add timing log for bastion creation.
1 parent a18595d commit 3b80171

3 files changed

Lines changed: 49 additions & 48 deletions

File tree

e2e/cluster.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,10 @@ func getOrCreateBastion(ctx context.Context, cluster *armcontainerservice.Manage
487487
}
488488

489489
func createNewBastion(ctx context.Context, cluster *armcontainerservice.ManagedCluster) (*Bastion, error) {
490+
now := time.Now()
491+
defer func() {
492+
logf(ctx, "bastion creation took %s", time.Since(now))
493+
}()
490494
nodeRG := *cluster.Properties.NodeResourceGroup
491495
location := *cluster.Location
492496
bastionName := fmt.Sprintf("%s-bastion", *cluster.Name)

e2e/kube.go

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
"github.com/Azure/agentbaker/e2e/config"
1313
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
14-
"github.com/stretchr/testify/require"
1514
appsv1 "k8s.io/api/apps/v1"
1615
corev1 "k8s.io/api/core/v1"
1716
v1 "k8s.io/api/core/v1"
@@ -145,58 +144,59 @@ func (k *Kubeclient) WaitUntilPodRunning(ctx context.Context, namespace string,
145144

146145
func (k *Kubeclient) WaitUntilNodeReady(ctx context.Context, t testing.TB, vmssName string) string {
147146
startTime := time.Now()
148-
t.Logf("waiting for node %s to be ready in k8s API", vmssName)
147+
t.Logf("waiting for node %s to be ready", vmssName)
149148
defer func() {
150-
t.Logf("waited for node %s to be ready in k8s API for %s", vmssName, time.Since(startTime))
149+
t.Logf("waited for node %s to be ready for %s", vmssName, time.Since(startTime))
151150
}()
152151

153-
var node *corev1.Node = nil
154-
watcher, err := k.Typed.CoreV1().Nodes().Watch(ctx, metav1.ListOptions{})
155-
require.NoError(t, err, "failed to start watching nodes")
156-
defer watcher.Stop()
157-
158-
for event := range watcher.ResultChan() {
159-
if event.Type != watch.Added && event.Type != watch.Modified {
160-
continue
161-
}
162-
163-
var nodeFromEvent *corev1.Node
164-
switch v := event.Object.(type) {
165-
case *corev1.Node:
166-
nodeFromEvent = v
167-
168-
default:
169-
t.Logf("skipping object type %T", event.Object)
170-
continue
171-
}
172-
173-
if !strings.HasPrefix(nodeFromEvent.Name, vmssName) {
174-
continue
175-
}
176-
177-
// found the right node. Use it!
178-
node = nodeFromEvent
179-
nodeTaints, _ := json.Marshal(node.Spec.Taints)
180-
nodeConditions, _ := json.Marshal(node.Status.Conditions)
152+
var lastNode *corev1.Node
153+
for ctx.Err() == nil {
154+
name := func() string {
155+
watcher, err := k.Typed.CoreV1().Nodes().Watch(ctx, metav1.ListOptions{})
156+
if err != nil {
157+
t.Logf("failed to start node watch: %v, retrying in 5s", err)
158+
select {
159+
case <-ctx.Done():
160+
case <-time.After(5 * time.Second):
161+
}
162+
return ""
163+
}
164+
defer watcher.Stop()
181165

182-
for _, cond := range node.Status.Conditions {
183-
if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
184-
t.Logf("node %s is ready. Taints: %s Conditions: %s", node.Name, string(nodeTaints), string(nodeConditions))
185-
return node.Name
166+
for event := range watcher.ResultChan() {
167+
if event.Type == watch.Error {
168+
t.Logf("node watch error: %v", event.Object)
169+
return ""
170+
}
171+
node, ok := event.Object.(*corev1.Node)
172+
if !ok || !strings.HasPrefix(node.Name, vmssName) {
173+
continue
174+
}
175+
if event.Type == watch.Deleted {
176+
t.Fatalf("node %s was deleted", node.Name)
177+
}
178+
lastNode = node
179+
for _, cond := range node.Status.Conditions {
180+
if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
181+
t.Logf("node %s is ready", node.Name)
182+
return node.Name
183+
}
184+
}
186185
}
186+
return ""
187+
}()
188+
if name != "" {
189+
return name
187190
}
188-
189-
t.Logf("node %s is not ready. Taints: %s Conditions: %s", node.Name, string(nodeTaints), string(nodeConditions))
191+
t.Logf("node watch disconnected, restarting")
190192
}
191193

192-
if node == nil {
193-
t.Fatalf("%q haven't appeared in k8s API server", vmssName)
194-
return ""
194+
if lastNode != nil {
195+
nodeJSON, _ := json.Marshal(lastNode)
196+
t.Fatalf("node %s (%s) not ready: %v\n%s", vmssName, lastNode.Name, ctx.Err(), nodeJSON)
195197
}
196-
197-
nodeString, _ := json.Marshal(node)
198-
t.Fatalf("failed to wait for %q (%s) to be ready %+v. Detail: %s", vmssName, node.Name, node.Status, string(nodeString))
199-
return node.Name
198+
t.Fatalf("node %q not found: %v", vmssName, ctx.Err())
199+
return ""
200200
}
201201

202202
// GetPodNetworkDebugPodForNode returns a pod that's a member of the 'debugnonhost' daemonset running in the cluster - this will return

e2e/test_helpers.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,12 +280,9 @@ func prepareAKSNode(ctx context.Context, s *Scenario) (*ScenarioVM, error) {
280280
require.NoError(s.T, err)
281281

282282
if !s.Config.SkipDefaultValidation {
283-
vmssCreatedAt := time.Now() // Record the start time
284-
creationElapse := time.Since(start) // Calculate the elapsed time
285283
scenarioVM.KubeName = s.Runtime.Cluster.Kube.WaitUntilNodeReady(ctx, s.T, s.Runtime.VMSSName)
286-
readyElapse := time.Since(vmssCreatedAt) // Calculate the elapsed time
287284
totalElapse := time.Since(start)
288-
toolkit.LogDuration(ctx, totalElapse, 3*time.Minute, fmt.Sprintf("Node %s took %s to be created and %s to be ready", s.Runtime.VMSSName, creationElapse, readyElapse))
285+
toolkit.LogDuration(ctx, totalElapse, 3*time.Minute, fmt.Sprintf("Node %s took %s to be created and joined the cluster", s.Runtime.VMSSName, totalElapse))
289286
}
290287

291288
return scenarioVM, nil

0 commit comments

Comments
 (0)