Skip to content

Commit 89441e6

Browse files
Kyle-Nealeclaude
andauthored
[ddev] Retry agent check on transient failure to fix SNMP E2E flake (DataDog#23646)
* Add bounded retry around agent check invocations in env/agent.py * fix(ddev): add trailing period to changelog entry Per AGENTS.md, changelog entries must end with a period. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 30c8360 commit 89441e6

2 files changed

Lines changed: 27 additions & 3 deletions

File tree

ddev/changelog.d/23646.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Retry agent check invocations on transient failures to address SNMP E2E flake from autodiscovery reload races.

ddev/src/ddev/cli/env/agent.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,26 @@
99

1010
if TYPE_CHECKING:
1111
from ddev.cli.application import Application
12+
from ddev.e2e.agent.interface import AgentInterface
13+
14+
15+
def _invoke_check_with_retry(agent: AgentInterface, args: list[str], *, retries: int = 3, backoff: float = 0.5) -> None:
16+
"""Invoke ``agent check`` with bounded retry to absorb transient autodiscovery-reload races."""
17+
import subprocess
18+
import time
19+
20+
for attempt in range(retries + 1):
21+
try:
22+
agent.invoke(args)
23+
return
24+
except subprocess.CalledProcessError:
25+
if attempt >= retries:
26+
raise
27+
click.echo(
28+
f'agent check failed (attempt {attempt + 1}/{retries + 1}), retrying in {backoff:.1f}s...',
29+
err=True,
30+
)
31+
time.sleep(backoff)
1232

1333

1434
@click.command(
@@ -54,7 +74,10 @@ def agent(app: Application, *, intg_name: str, environment: str, args: tuple[str
5474

5575
if config_file is None or not trigger_run:
5676
try:
57-
agent.invoke(full_args)
77+
if trigger_run:
78+
_invoke_check_with_retry(agent, full_args)
79+
else:
80+
agent.invoke(full_args)
5881
except subprocess.CalledProcessError as e:
5982
app.abort(code=e.returncode)
6083

@@ -67,14 +90,14 @@ def agent(app: Application, *, intg_name: str, environment: str, args: tuple[str
6790
if not env_data.config_file.is_file():
6891
try:
6992
env_data.write_config(config)
70-
agent.invoke(full_args)
93+
_invoke_check_with_retry(agent, full_args)
7194
finally:
7295
env_data.config_file.unlink()
7396
else:
7497
temp_config_file = env_data.config_file.parent / f'{env_data.config_file.name}.bak.example'
7598
env_data.config_file.replace(temp_config_file)
7699
try:
77100
env_data.write_config(config)
78-
agent.invoke(full_args)
101+
_invoke_check_with_retry(agent, full_args)
79102
finally:
80103
temp_config_file.replace(env_data.config_file)

0 commit comments

Comments
 (0)