Skip to content

Commit 354f6ec

Browse files
Always clean DDA before next test to avoid races (#2954) (#2955)
(cherry picked from commit 41dddf7) Co-authored-by: Timothée Bavelier <97530782+tbavelier@users.noreply.github.com>
1 parent d7d73cf commit 354f6ec

1 file changed

Lines changed: 31 additions & 36 deletions

File tree

test/e2e/tests/k8s_suite/k8s_suite_test.go

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,28 @@ serviceAccount:
8888
lastTestName = testName
8989
s.UpdateEnv(provisioners.KubernetesProvisioner(opts...))
9090
}
91+
// applyDDA tears down any in-stack DatadogAgent before installing the new
92+
// one. Used instead of updateEnv whenever a subtest applies a DDA.
93+
//
94+
// Why the explicit two-step swap: applying a new DDA on top of a previous
95+
// one does delete+create concurrently, which can leave the new agent
96+
// DaemonSet pod stuck on resources still owned by the previous DDA. The
97+
// most visible cases are:
98+
// - K8s <1.20: the legacy SA-token controller can't keep up with the SA
99+
// delete+create churn during the swap, so the new agent pod sits in
100+
// FailedMount on its auto-generated <sa>-token-<rand> Secret;
101+
// - host-port subtests (APM hostPort, DSD UDP): the previous agent pod
102+
// hasn't released the port yet when the new pod tries to bind.
103+
// Both manifest as "agent pod never reaches Running" on the new DDA.
104+
applyDDA := func(testName string, opts []provisioners.KubernetesProvisionerOption) {
105+
cleanupOpts := []provisioners.KubernetesProvisionerOption{
106+
provisioners.WithTestName(testName),
107+
provisioners.WithoutDDA(),
108+
}
109+
cleanupOpts = append(cleanupOpts, defaultProvisionerOpts...)
110+
updateEnv(testName, cleanupOpts)
111+
updateEnv(testName, opts)
112+
}
91113
t.Cleanup(func() {
92114
if lastTestName == "" {
93115
return
@@ -131,7 +153,7 @@ serviceAccount:
131153
provisioners.WithLocal(s.local),
132154
}
133155

134-
updateEnv("e2e-operator-minimal-dda", provisionerOptions)
156+
applyDDA("e2e-operator-minimal-dda", provisionerOptions)
135157

136158
err = s.Env().FakeIntake.Client().FlushServerAndResetAggregators()
137159
s.Assert().NoError(err)
@@ -197,7 +219,7 @@ serviceAccount:
197219
provisioners.WithLocal(s.local),
198220
}
199221

200-
updateEnv("e2e-operator-ksm-ccr", provisionerOptions)
222+
applyDDA("e2e-operator-ksm-ccr", provisionerOptions)
201223

202224
err = s.Env().FakeIntake.Client().FlushServerAndResetAggregators()
203225
s.Assert().NoError(err)
@@ -238,7 +260,7 @@ serviceAccount:
238260
provisionerOptions = append(provisionerOptions, defaultProvisionerOpts...)
239261

240262
// Add nginx with annotations
241-
updateEnv("e2e-operator-autodiscovery", provisionerOptions)
263+
applyDDA("e2e-operator-autodiscovery", provisionerOptions)
242264

243265
err = s.Env().FakeIntake.Client().FlushServerAndResetAggregators()
244266
s.Assert().NoError(err)
@@ -284,7 +306,7 @@ serviceAccount:
284306
provisioners.WithLocal(s.local),
285307
}
286308

287-
updateEnv("e2e-operator-logs-collection", provisionerOptions)
309+
applyDDA("e2e-operator-logs-collection", provisionerOptions)
288310

289311
err = s.Env().FakeIntake.Client().FlushServerAndResetAggregators()
290312
s.Assert().NoError(err)
@@ -307,17 +329,6 @@ serviceAccount:
307329
})
308330

309331
s.T().Run("APM hostPort k8s service UDP works", func(t *testing.T) {
310-
311-
// Cleanup to avoid potential lingering DatadogAgent
312-
// Avoid race with the new Agent not being able to bind to the hostPort
313-
withoutDDAProvisionerOptions := []provisioners.KubernetesProvisionerOption{
314-
provisioners.WithTestName("e2e-operator-apm"),
315-
provisioners.WithoutDDA(),
316-
provisioners.WithLocal(s.local),
317-
}
318-
withoutDDAProvisionerOptions = append(withoutDDAProvisionerOptions, defaultProvisionerOpts...)
319-
updateEnv("e2e-operator-apm", withoutDDAProvisionerOptions)
320-
321332
var apmAgentSelector = ",agent.datadoghq.com/name=datadog-agent-apm"
322333
ddaConfigPath, err := common.GetAbsPath(filepath.Join(common.ManifestsPath, "apm", "datadog-agent-apm.yaml"))
323334
assert.NoError(s.T(), err)
@@ -342,7 +353,7 @@ serviceAccount:
342353
ddaProvisionerOptions = append(ddaProvisionerOptions, defaultProvisionerOpts...)
343354

344355
// Deploy APM DatadogAgent and tracegen
345-
updateEnv("e2e-operator-apm", ddaProvisionerOptions)
356+
applyDDA("e2e-operator-apm", ddaProvisionerOptions)
346357

347358
// Verify traces collection on agent pod
348359
s.EventuallyWithTf(func(c *assert.CollectT) {
@@ -372,14 +383,6 @@ serviceAccount:
372383

373384
// --- Subtest: DSD UDP, ADP disabled ---
374385
s.T().Run("DSD UDP without ADP", func(t *testing.T) {
375-
// Deploy without DDA first to avoid host port binding races
376-
withoutDDAOpts := []provisioners.KubernetesProvisionerOption{
377-
provisioners.WithTestName("e2e-operator-dsd-udp"),
378-
provisioners.WithoutDDA(),
379-
}
380-
withoutDDAOpts = append(withoutDDAOpts, defaultProvisionerOpts...)
381-
updateEnv("e2e-operator-dsd-udp", withoutDDAOpts)
382-
383386
ddaConfigPath, err := common.GetAbsPath(filepath.Join(common.ManifestsPath, "dogstatsd", "datadog-agent-dsd-udp.yaml"))
384387
assert.NoError(s.T(), err)
385388

@@ -395,7 +398,7 @@ serviceAccount:
395398
provisioners.WithDDAOptions(ddaOpts...),
396399
}
397400
provisionerOpts = append(provisionerOpts, defaultProvisionerOpts...)
398-
updateEnv("e2e-operator-dsd-udp", provisionerOpts)
401+
applyDDA("e2e-operator-dsd-udp", provisionerOpts)
399402

400403
agentSelector := common.NodeAgentSelector + ",agent.datadoghq.com/name=dda-dsd-udp"
401404

@@ -415,14 +418,6 @@ serviceAccount:
415418

416419
// --- Subtest: DSD UDP, ADP enabled ---
417420
s.T().Run("DSD UDP with ADP", func(t *testing.T) {
418-
// Deploy without DDA first to avoid host port binding races
419-
withoutDDAOpts := []provisioners.KubernetesProvisionerOption{
420-
provisioners.WithTestName("e2e-operator-dsd-udp-adp"),
421-
provisioners.WithoutDDA(),
422-
}
423-
withoutDDAOpts = append(withoutDDAOpts, defaultProvisionerOpts...)
424-
updateEnv("e2e-operator-dsd-udp-adp", withoutDDAOpts)
425-
426421
ddaConfigPath, err := common.GetAbsPath(filepath.Join(common.ManifestsPath, "dogstatsd", "datadog-agent-dsd-udp-adp.yaml"))
427422
assert.NoError(s.T(), err)
428423

@@ -438,7 +433,7 @@ serviceAccount:
438433
provisioners.WithDDAOptions(ddaOpts...),
439434
}
440435
provisionerOpts = append(provisionerOpts, defaultProvisionerOpts...)
441-
updateEnv("e2e-operator-dsd-udp-adp", provisionerOpts)
436+
applyDDA("e2e-operator-dsd-udp-adp", provisionerOpts)
442437

443438
agentSelector := common.NodeAgentSelector + ",agent.datadoghq.com/name=dda-dsd-udp-adp"
444439

@@ -475,7 +470,7 @@ serviceAccount:
475470
provisioners.WithDDAOptions(ddaOpts...),
476471
}
477472
provisionerOpts = append(provisionerOpts, defaultProvisionerOpts...)
478-
updateEnv("e2e-operator-dsd-uds", provisionerOpts)
473+
applyDDA("e2e-operator-dsd-uds", provisionerOpts)
479474

480475
agentSelector := common.NodeAgentSelector + ",agent.datadoghq.com/name=dda-dsd-uds"
481476

@@ -511,7 +506,7 @@ serviceAccount:
511506
provisioners.WithDDAOptions(ddaOpts...),
512507
}
513508
provisionerOpts = append(provisionerOpts, defaultProvisionerOpts...)
514-
updateEnv("e2e-operator-dsd-uds-adp", provisionerOpts)
509+
applyDDA("e2e-operator-dsd-uds-adp", provisionerOpts)
515510

516511
agentSelector := common.NodeAgentSelector + ",agent.datadoghq.com/name=dda-dsd-uds-adp"
517512

0 commit comments

Comments
 (0)