diff --git a/ddev/changelog.d/23722.added b/ddev/changelog.d/23722.added new file mode 100644 index 0000000000000..4d0ae91f2b214 --- /dev/null +++ b/ddev/changelog.d/23722.added @@ -0,0 +1 @@ +Add `ddev release test-agent` command that dispatches the Linux and Windows Agent test workflows against a release branch or tag. diff --git a/ddev/changelog.d/23860.changed b/ddev/changelog.d/23860.changed new file mode 100644 index 0000000000000..887a4c848e64b --- /dev/null +++ b/ddev/changelog.d/23860.changed @@ -0,0 +1 @@ +`ddev release branch tag` now accepts `--release/-r`, `--ref`, `--rc N`, and `--yes/-y`, and prompts to confirm when run on a release branch without `--release`. Existing non-interactive callers that piped a single `y` need to pass `--yes` or one extra confirmation. diff --git a/ddev/changelog.d/24253.fixed b/ddev/changelog.d/24253.fixed new file mode 100644 index 0000000000000..ee22e145029e0 --- /dev/null +++ b/ddev/changelog.d/24253.fixed @@ -0,0 +1 @@ +Bundle per-job correlation into the CI dispatcher's BatchFinished via BatchJobResult. diff --git a/ddev/src/ddev/cli/ci/tests/messages.py b/ddev/src/ddev/cli/ci/tests/messages.py index 291c185b4c035..574b828a3a8fa 100644 --- a/ddev/src/ddev/cli/ci/tests/messages.py +++ b/ddev/src/ddev/cli/ci/tests/messages.py @@ -3,11 +3,23 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from __future__ import annotations -from dataclasses import dataclass -from typing import Literal +import re +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Literal from ddev.event_bus.orchestrator import BaseMessage +if TYPE_CHECKING: + from pathlib import Path + + from ddev.utils.github_async.models import WorkflowJob + +# Characters GitHub disallows in an artifact name (plus CR/LF). +ARTIFACT_NAME_DISALLOWED = re.compile(r'["\:<>|*?\\/\r\n]') +# Separator between the artifact name's fields. Names are matched by reconstruction, not by +# splitting, so the separator does not need to be absent from the field values. +ARTIFACT_NAME_SEPARATOR = "_" + @dataclass class BatchJob: @@ -21,6 +33,16 @@ class BatchJob: unit_tests: bool e2e_tests: bool + def artifact_name(self) -> str: + """Deterministic artifact name built from the job's target, environment, and platform. + + Pure and deterministic. Each field is sanitized to GitHub's artifact-name constraints and + joined by the separator. Uniqueness within a batch relies on those three fields being + distinct per job. + """ + fields = (self.target, self.environment, self.platform) + return ARTIFACT_NAME_SEPARATOR.join(ARTIFACT_NAME_DISALLOWED.sub("_", field) for field in fields) + @dataclass class FailedCheck: @@ -30,6 +52,57 @@ class FailedCheck: url: str +@dataclass +class BatchJobResult: + """Everything known about a single job in a finished batch, correlated by the producer. + + ``artifact_name_path`` is the single downloaded folder for the job (named after the job's + ``artifact_name``); the three ``*_artifact_name`` fields are the expected per-facet file names + inside that folder. + """ + + job: BatchJob + workflow_job: WorkflowJob | None + artifact_name_path: str | None + unit_artifact_name: str + e2e_artifact_name: str + coverage_artifact_name: str + + @staticmethod + def correlate( + job_list: list[BatchJob], + jobs: list[WorkflowJob], + artifact_dirs: dict[str, Path], + ) -> list[BatchJobResult]: + """Correlate each job's spec, its workflow-run result, and its artifact directory. + + The workflow-job join is by name (tolerant of misses). Each job's artifact folder is matched + by reconstructing its name from the job's fields (``artifact_name``) and looking it up among + the downloaded folders; the path is recorded only when it exists on disk. That single folder + holds the three per-facet files, whose names (``unit-``/``e2e-``/``coverage-`` prefixed on + the base name) are recorded for the gatherer. A job missing from the API or from disk still + yields a well-formed result. + """ + jobs_by_name = {job.name: job for job in jobs} + + results: list[BatchJobResult] = [] + for batch_job in job_list: + base = batch_job.artifact_name() + artifact_dir = artifact_dirs.get(base) + artifact_name_path = str(artifact_dir) if artifact_dir is not None and artifact_dir.exists() else None + results.append( + BatchJobResult( + job=batch_job, + workflow_job=jobs_by_name.get(batch_job.name), + artifact_name_path=artifact_name_path, + unit_artifact_name=f"unit-{base}", + e2e_artifact_name=f"e2e-{base}", + coverage_artifact_name=f"coverage-{base}", + ) + ) + return results + + @dataclass class WorkflowStatus: """Status of a single GitHub Actions workflow run.""" @@ -58,6 +131,8 @@ class BatchFinished(BaseMessage): run_id: int workflow_url: str artifacts_path: str + timed_out: bool = False + batch_jobs: list[BatchJobResult] = field(default_factory=list) @dataclass diff --git a/ddev/src/ddev/cli/ci/tests/task_test_runner.py b/ddev/src/ddev/cli/ci/tests/task_test_runner.py index 101a5b6b892e0..30dbddf3433ab 100644 --- a/ddev/src/ddev/cli/ci/tests/task_test_runner.py +++ b/ddev/src/ddev/cli/ci/tests/task_test_runner.py @@ -11,10 +11,10 @@ from pathlib import Path from typing import Any, Literal -from ddev.cli.ci.tests.messages import BatchFinished, TestBatch +from ddev.cli.ci.tests.messages import BatchFinished, BatchJob, BatchJobResult, TestBatch from ddev.event_bus.orchestrator import AsyncProcessor from ddev.utils.github_async import AsyncGitHubClient, GitHubResponse -from ddev.utils.github_async.models import WorkflowRun +from ddev.utils.github_async.models import WorkflowJob, WorkflowRun def _conclusion_to_status(conclusion: str | None) -> Literal["success", "failure", "skipped"]: @@ -63,7 +63,12 @@ async def process_message(self, message: TestBatch) -> None: log_extra: dict[str, Any] = {"batch_id": message.id} dispatch = await self._client.create_workflow_dispatch( - self._options.owner, self._options.repo, self._options.workflow_id, ref=self._options.ref, inputs=inputs + self._options.owner, + self._options.repo, + self._options.workflow_id, + ref=self._options.ref, + inputs=inputs, + return_run_details=True, ) run_id = dispatch.data.workflow_run_id log_extra["run_id"] = run_id @@ -98,15 +103,19 @@ async def process_message(self, message: TestBatch) -> None: self._logger.warning("Workflow completed with null conclusion", extra=log_extra) final_conclusion = raw or "neutral" - artifacts_path = await self._download_artifacts(run_id, log_extra) + artifact_dirs = await self._download_artifacts(run_id, log_extra) self._logger.info("Artifacts downloaded", extra=log_extra) + jobs = await self._list_jobs(run_id, log_extra) + batch_jobs = BatchJobResult.correlate(message.job_list, jobs, artifact_dirs) + finished = BatchFinished( id=message.id, status=_conclusion_to_status(raw), run_id=run_id, workflow_url=workflow_url, - artifacts_path=str(artifacts_path), + artifacts_path=str(self._options.artifacts_base_path), + batch_jobs=batch_jobs, ) finally: try: @@ -134,16 +143,38 @@ async def _poll_until_complete(self, run_id: int, log_extra: dict[str, Any]) -> self._logger.info("Workflow completed", extra=log_extra) return run + async def _list_jobs(self, run_id: int, log_extra: dict[str, Any]) -> list[WorkflowJob]: + """Fetch the workflow run's jobs; on failure log a warning and return an empty list.""" + jobs: list[WorkflowJob] = [] + try: + async for page in self._client.list_workflow_jobs(self._options.owner, self._options.repo, run_id): + jobs.extend(page.data.jobs) + except Exception: + self._logger.warning("Failed to list workflow jobs", extra=log_extra, exc_info=True) + return jobs + def _build_inputs(self, message: TestBatch) -> dict[str, str]: return { "batch_id": message.id, "checkout_sha": self._options.checkout_sha, "integrations": json.dumps(message.integrations), - "job_list": json.dumps([dataclasses.asdict(job) for job in message.job_list]), + "job_list": json.dumps([self._job_input(job) for job in message.job_list]), } - async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> Path: - run_path = self._options.artifacts_base_path / str(run_id) + @staticmethod + def _job_input(job: BatchJob) -> dict[str, Any]: + """Serialize a job for the workflow, carrying the artifact name so all its files upload under + a single folder/zip named after it (matched later via ``BatchJob.artifact_name``).""" + return {**dataclasses.asdict(job), "artifact_name": job.artifact_name()} + + async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> dict[str, Path]: + """Download the run's artifacts and return an artifact-name -> path map. + + The map keys on the GitHub artifact name (the contract a ``BatchJob`` reproduces via + ``artifact_name``), letting the producer resolve each job's directory deterministically. + """ + base_path = self._options.artifacts_base_path + artifact_dirs: dict[str, Path] = {} failures: list[tuple[int, str]] = [] try: async for page in self._client.list_workflow_run_artifacts(self._options.owner, self._options.repo, run_id): @@ -164,9 +195,10 @@ async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> P extra=log_extra, ) continue - target = run_path / f"{artifact.id}-{artifact.name}" + target = base_path / artifact.name try: await self._client.download_artifact(artifact.archive_download_url, target) + artifact_dirs[artifact.name] = target self._logger.info("Downloaded artifact %s -> %s", artifact.id, target, extra=log_extra) except Exception as exc: self._logger.warning( @@ -186,4 +218,4 @@ async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> P failures, extra=log_extra, ) - return run_path + return artifact_dirs diff --git a/ddev/src/ddev/cli/release/__init__.py b/ddev/src/ddev/cli/release/__init__.py index 22d96ab906fd6..e6b136596e87f 100644 --- a/ddev/src/ddev/cli/release/__init__.py +++ b/ddev/src/ddev/cli/release/__init__.py @@ -14,6 +14,7 @@ from ddev.cli.release.port_commit import port_commit from ddev.cli.release.show import show from ddev.cli.release.stats import stats +from ddev.cli.release.test_agent import test_agent @click.group(short_help='Manage the release of integrations') @@ -33,4 +34,5 @@ def release(): release.add_command(show) release.add_command(stats) release.add_command(tag) +release.add_command(test_agent) release.add_command(upload) diff --git a/ddev/src/ddev/cli/release/branch/tag.py b/ddev/src/ddev/cli/release/branch/tag.py index 1fa102623f521..11fc901410f58 100644 --- a/ddev/src/ddev/cli/release/branch/tag.py +++ b/ddev/src/ddev/cli/release/branch/tag.py @@ -8,23 +8,70 @@ from httpx import HTTPStatusError from packaging.version import Version +from ddev.utils.git import GitRepository + from .build_agent import BUILD_AGENT_YAML_PATH, find_build_agent_template_main_branch_matches from .create import BRANCH_NAME_REGEX if TYPE_CHECKING: from ddev.cli.application import Application + from ddev.utils.github import PullRequest UPDATE_BUILD_AGENT_YAML_WORKFLOW = 'update-build-agent-yaml.yml' # The dd-octo-sts policy grants PR-writing credentials only to this workflow on master. UPDATE_BUILD_AGENT_YAML_WORKFLOW_REF = 'master' +RELEASE_INPUT_REGEX = re.compile(r'^(\d+\.\d+)(\.x)?$') +# Sentinel used by `--rc` when the user provides the flag without a value. +# Chosen so that `--rc auto` (which users could plausibly type) is rejected by `_parse_rc_value` +# rather than silently treated as bare `--rc`. +RC_AUTO = '__rc_auto_sentinel__' @click.command @click.option( - '--final/--rc', + '--release', + '-r', + 'release', + default=None, + help=( + 'Release to tag, e.g. `7.56` or `7.56.x`. ' + 'If omitted, the current branch must be a release branch and you will be prompted to confirm.' + ), +) +@click.option( + '--ref', + 'ref', + default=None, + help=( + 'Commit (SHA, tag, or ref) to tag instead of the tip of the release branch. ' + 'Must resolve to a commit that is an ancestor of `origin/`.' + ), +) +@click.option( + '--final', + 'final', + is_flag=True, default=False, - show_default=True, - help="Whether we're tagging the final release or a release candidate (rc).", + help='Tag a final release. Mutually exclusive with `--rc`.', +) +@click.option( + '--rc', + 'rc', + is_flag=False, + flag_value=RC_AUTO, + default=None, + help=( + 'Tag a release candidate (default). Pass `--rc` alone to auto-suggest the next RC number, ' + 'or `--rc N` to pin the RC number explicitly. Mutually exclusive with `--final`.' + ), +) +@click.option( + '--yes', + '-y', + 'yes', + is_flag=True, + default=False, + help='Skip yes/no confirmation prompts and assume the user always answered yes.', ) @click.option( '--skip-open-pr-check', @@ -34,35 +81,97 @@ help='Skip checking GitHub for open PRs targeting this release branch before tagging.', ) @click.pass_obj -def tag(app, final: bool, skip_open_pr_check: bool): +def tag( + app: Application, + release: str | None, + ref: str | None, + final: bool, + rc: str | None, + yes: bool, + skip_open_pr_check: bool, +) -> None: """ - Tag the release branch either as release candidate or final release. + Tag a release branch with a release-candidate or final-release tag. + + The command always operates against `origin/` after fetching: it does not + check out or modify any local branch. The release branch is determined as follows: + + \b + - If `--release` is given (e.g. `7.56` or `7.56.x`), that branch is used. + - If `--release` is omitted and you are on a release branch, you are prompted to confirm + tagging that branch (use `--yes` to skip the prompt). + - If `--release` is omitted and you are not on a release branch, the command aborts and + asks you to pass `--release`. + + Tag-type selection: + + \b + - `--rc` (default) tags a release candidate. Pass `--rc N` to pin the RC number; pass + `--rc` alone to auto-suggest the next available RC. The command warns (but does not + abort) if the requested number leaves a gap in the RC sequence. + - `--final` tags a final release; mutually exclusive with `--rc`. + + Other options: + + \b + - `--ref ` tags an arbitrary commit instead of the branch tip. The commit must be + an ancestor of `origin/`. Useful when later commits should not be + included in the tag. + - `--yes/-y` skips all yes/no confirmations (target-branch, backward-RC, final tag). + - `--skip-open-pr-check` skips the GitHub query for open PRs targeting the branch. """ - branch_name = app.repo.git.current_branch() - release_branch = BRANCH_NAME_REGEX.match(branch_name) - if release_branch is None: - app.abort( - f'Invalid branch name: {branch_name}. Branch name must match the pattern {BRANCH_NAME_REGEX.pattern}.' - ) + if final and rc is not None: + raise click.UsageError('`--final` and `--rc` are mutually exclusive.') + is_rc = not final + pinned_rc = _parse_rc_value(rc) if is_rc else None - click.echo(app.repo.git.pull(branch_name)) - click.echo(app.repo.git.fetch_tags()) + current_branch = app.repo.git.current_branch() + target_branch = _resolve_target_branch(app, release, yes, current_branch) + + git = app.repo.git + app.display_waiting('Fetching from origin...') + git.fetch_tags() + _ensure_branch_on_origin(app, git, target_branch) + + tag_ref = _resolve_tag_ref(app, git, target_branch, ref) + effective_ref = tag_ref if tag_ref is not None else f'origin/{target_branch}' + + build_agent_yaml_needs_update = _warn_if_build_agent_yaml_stale(app, git, effective_ref) + new_tag = _compute_new_tag(app, git, target_branch, is_rc, pinned_rc, yes) + _confirm_and_push_tag(app, git, target_branch, new_tag, tag_ref, effective_ref, yes, skip_open_pr_check) - build_agent_yaml_needs_update = _build_agent_yaml_points_to_main() - # Recovery path for release branches cut before build_agent.yaml was updated. if build_agent_yaml_needs_update: - app.display_warning( - "`.gitlab/build_agent.yaml` still points to `main`.\n" - "The update PR may not have been created or merged yet.\n" - "Will trigger the workflow after the tag is pushed.\n" - "Tagging will continue." - ) + _trigger_build_agent_yaml_update_workflow(app, target_branch) + + +def _warn_if_build_agent_yaml_stale(app: Application, git: GitRepository, ref: str) -> bool: + """Warn (and return True) if `.gitlab/build_agent.yaml` at `ref` still points to `main`.""" + if not _build_agent_yaml_points_to_main(git, ref): + return False + # Recovery path for release branches cut before build_agent.yaml was updated. + app.display_warning( + "`.gitlab/build_agent.yaml` still points to `main`.\n" + "The update PR may not have been created or merged yet.\n" + "Will trigger the workflow after the tag is pushed.\n" + "Tagging will continue." + ) + return True + - major_minor_version = branch_name.replace('.x', '') +def _compute_new_tag( + app: Application, + git: GitRepository, + target_branch: str, + is_rc: bool, + pinned_rc: int | None, + yes: bool, +) -> str: + """Compute the new tag string, validating RC bounds, existence, gaps, and backward moves.""" + major_minor_version = target_branch.replace('.x', '') this_release_tags = sorted( ( Version(t) - for t in set(app.repo.git.tags(glob_pattern=major_minor_version + '.*')) + for t in set(git.tags(glob_pattern=major_minor_version + '.*')) # We take 'major.minor.x' as the branch name pattern and replace 'x' with 'patch-rc.number'. # We make the RC component optional so that final tags also match our filter. if re.match(BRANCH_NAME_REGEX.pattern.replace('x', r'\d+(\-rc\.\d+)?'), t) @@ -73,61 +182,181 @@ def tag(app, final: bool, skip_open_pr_check: bool): last_tag_was_final = last_rc is None new_patch = last_patch + 1 if last_tag_was_final else last_patch new_tag = f'{major_minor_version}.{new_patch}' - if not final: - new_rc_guess = 1 if last_tag_was_final else last_rc + 1 + if not is_rc: + return new_tag + + new_rc_guess = 1 if last_tag_was_final else last_rc + 1 + if pinned_rc is not None: + next_rc = pinned_rc + elif yes: + next_rc = new_rc_guess + click.echo(f'Using auto-suggested RC number: {next_rc}') + else: next_rc = click.prompt( - 'What RC number are we tagging? (hit ENTER to accept suggestion)', type=int, default=new_rc_guess + 'What RC number are we tagging? (hit ENTER to accept suggestion)', + type=int, + default=new_rc_guess, ) - if next_rc < 1: - app.abort('RC number must be at least 1.') - new_tag += f'-rc.{next_rc}' - if Version(new_tag) in this_release_tags: - app.abort(f'Tag {new_tag} already exists. Switch to git to overwrite it.') - if not last_tag_was_final and next_rc < last_rc: - click.secho('!!! WARNING !!!') - if not click.confirm( - f'The latest RC is {last_rc}. ' - 'You are about to go back in time by creating an RC with a number less than that. Are you sure? [y/N]' - ): - app.abort('Did not get confirmation, aborting. Did not create or push the tag.') - - prs = [] - if not skip_open_pr_check: - httpx_logger = logging.getLogger('httpx') - previous_level = httpx_logger.level - httpx_logger.setLevel(logging.WARNING) - try: - prs = app.github.list_open_pull_requests_targeting_base(branch_name) - except Exception as e: - click.secho(f'Warning: unable to check for open PRs: {e}', fg='yellow') - finally: - httpx_logger.setLevel(previous_level) - - if prs: - click.secho('!!! WARNING !!!', fg='yellow') - click.secho(f'Found {len(prs)} open PR(s) targeting base branch {branch_name}:', fg='yellow') - for pr in prs[:20]: - click.secho(f' - #{pr.number} {pr.title} ({pr.html_url})', fg='yellow') - if len(prs) > 20: - click.secho(f' ... and {len(prs) - 20} more', fg='yellow') + if next_rc < 1: + # Only reachable for the interactive prompt — `--rc N` is validated up-front in + # `_parse_rc_value`. + app.abort('RC number must be at least 1.') + new_tag += f'-rc.{next_rc}' + if Version(new_tag) in this_release_tags: + app.abort(f'Tag {new_tag} already exists. Switch to git to overwrite it.') + _warn_on_rc_gap(app, next_rc, new_rc_guess, target_branch) + if not last_tag_was_final and next_rc < last_rc: + click.secho('!!! WARNING !!!') + if not _confirm( + yes, + f'The latest RC is {last_rc}. ' + 'You are about to go back in time by creating an RC with a number less than that. Are you sure? ' + '[y/N]', + ): + app.abort('Did not get confirmation, aborting. Did not create or push the tag.') + return new_tag + + +def _confirm_and_push_tag( + app: Application, + git: GitRepository, + target_branch: str, + new_tag: str, + tag_ref: str | None, + effective_ref: str, + yes: bool, + skip_open_pr_check: bool, +) -> None: + """Surface open-PR warnings, get final confirmation, then create and push the tag. + + `tag_ref` is the user-resolved commit when `--ref` was explicitly provided (used for the + "at ?" prompt suffix). `effective_ref` is what `git tag` actually keys off — either + that same commit, or `origin/` when `--ref` was not supplied. + """ + prs = _check_open_prs(app, target_branch, skip_open_pr_check) prompt = f'Create and push this tag: {new_tag}?' if prs: - prompt = f'Open PRs found targeting {branch_name}. Create and push this tag anyway: {new_tag}?' + prompt = f'Open PRs found targeting {target_branch}. Create and push this tag anyway: {new_tag}?' + if tag_ref is not None: + prompt = prompt.rstrip('?') + f' at {tag_ref}?' - if not click.confirm(prompt): + if not _confirm(yes, prompt): app.abort('Did not get confirmation, aborting. Did not create or push the tag.') - click.echo(app.repo.git.tag(new_tag, message=new_tag)) - click.echo(app.repo.git.push(new_tag)) - if build_agent_yaml_needs_update: - _trigger_build_agent_yaml_update_workflow(app, branch_name) + try: + click.echo(git.tag(new_tag, message=new_tag, ref=effective_ref)) + click.echo(git.push(new_tag)) + except OSError as e: + app.abort(f'Failed to create or push tag `{new_tag}`: {e}') + + +def _parse_rc_value(rc: str | None) -> int | None: + if rc is None or rc == RC_AUTO: + return None + try: + value = int(rc) + except ValueError as e: + raise click.UsageError(f'`--rc` value must be a positive integer, got `{rc}`.') from e + if value < 1: + raise click.UsageError(f'`--rc` value must be a positive integer, got `{rc}`.') + return value + + +def _resolve_target_branch(app: Application, release: str | None, yes: bool, current_branch: str) -> str: + if release is not None: + match = RELEASE_INPUT_REGEX.match(release) + if match is None: + raise click.UsageError(f'Invalid `--release` value: `{release}`. Must look like `7.56` or `7.56.x`.') + return f'{match.group(1)}.x' + + if BRANCH_NAME_REGEX.match(current_branch) is None: + app.abort( + f'Current branch `{current_branch}` is not a release branch. ' + f'Pass `--release ` to choose the release branch to tag.' + ) + + if not _confirm(yes, f'You are on release branch `{current_branch}`. Tag this release?'): + app.abort('Did not get confirmation, aborting. Did not create or push the tag.') + return current_branch + +def _ensure_branch_on_origin(app: Application, git: GitRepository, branch: str) -> None: + try: + output = git.capture('ls-remote', '--heads', 'origin', branch) + except OSError as e: + app.abort(f'Failed to query `origin` for branch `{branch}`: {e}') + if not any(line.strip() for line in output.splitlines()): + app.abort(f'Release branch `{branch}` does not exist on `origin`.') + + +def _resolve_tag_ref(app: Application, git: GitRepository, target_branch: str, ref: str | None) -> str | None: + if ref is None: + return None + commit_sha: str | None = None + try: + commit_sha = git.capture('rev-parse', '--verify', f'{ref}^{{commit}}').strip() + git.capture('merge-base', '--is-ancestor', commit_sha, f'origin/{target_branch}') + except OSError as e: + if commit_sha is None: + app.abort(f'`--ref` value `{ref}` does not resolve to a commit: {e}') + app.abort( + f'`--ref` value `{ref}` (resolved to `{commit_sha}`) is not an ancestor of ' + f'`origin/{target_branch}` (or `git merge-base` itself failed): {e}' + ) + return commit_sha + + +def _warn_on_rc_gap(app: Application, next_rc: int, expected_rc: int, branch: str) -> None: + if next_rc <= expected_rc: + return + missing = list(range(expected_rc, next_rc)) + missing_list = ', '.join(str(n) for n in missing) + suggested_rc = missing[0] + app.display_warning( + f'Requested RC {next_rc} skips ahead of the next available RC ({expected_rc}). ' + f'Missing RC number(s): {missing_list}.\n' + f'To fill the gap later, run: ' + f'ddev release branch tag --release {branch} --rc {suggested_rc} --ref ' + ) -def _build_agent_yaml_points_to_main() -> bool: - from ddev.utils.fs import Path - path = Path(BUILD_AGENT_YAML_PATH) - return path.exists() and bool(find_build_agent_template_main_branch_matches(path.read_text())) +def _check_open_prs(app: Application, target_branch: str, skip_open_pr_check: bool) -> list[PullRequest]: + if skip_open_pr_check: + return [] + httpx_logger = logging.getLogger('httpx') + previous_level = httpx_logger.level + httpx_logger.setLevel(logging.WARNING) + try: + prs = app.github.list_open_pull_requests_targeting_base(target_branch) + except Exception as e: + click.secho(f'Warning: unable to check for open PRs: {e}', fg='yellow') + return [] + finally: + httpx_logger.setLevel(previous_level) + + if prs: + click.secho('!!! WARNING !!!', fg='yellow') + click.secho(f'Found {len(prs)} open PR(s) targeting base branch {target_branch}:', fg='yellow') + for pr in prs[:20]: + click.secho(f' - #{pr.number} {pr.title} ({pr.html_url})', fg='yellow') + if len(prs) > 20: + click.secho(f' ... and {len(prs) - 20} more', fg='yellow') + return prs + + +def _confirm(yes: bool, prompt: str) -> bool: + if yes: + click.echo(f'{prompt} [auto-yes]') + return True + return click.confirm(prompt) + + +def _build_agent_yaml_points_to_main(git: GitRepository, ref: str) -> bool: + try: + content = git.show_file(BUILD_AGENT_YAML_PATH, ref) + except OSError: + return False + return bool(find_build_agent_template_main_branch_matches(content)) def _trigger_build_agent_yaml_update_workflow(app: Application, branch_name: str) -> None: diff --git a/ddev/src/ddev/cli/release/port_commit_workflow.py b/ddev/src/ddev/cli/release/port_commit_workflow.py index 472f1692ecc17..b64ac145e7f49 100644 --- a/ddev/src/ddev/cli/release/port_commit_workflow.py +++ b/ddev/src/ddev/cli/release/port_commit_workflow.py @@ -695,7 +695,7 @@ def resolve_port_plan( dry_run=dry_run, ) - app.output(_format_plan_summary(plan), stderr=True) + app.output(_format_plan_summary(app, plan), stderr=True) if not dry_run and not click.confirm('Continue?'): app.abort('Did not get confirmation, aborting.') @@ -705,7 +705,6 @@ def resolve_port_plan( def display_completion_summary(app: Application, plan: PortPlan, *, pr_url: str | None) -> None: """Print a panel summarising the port outcome.""" - text = Text() rows: list[tuple[str, str]] = [ ('Commit', f'{plan.full_sha[:10]} - {plan.clean_subject}'), ('Target', plan.target_branch), @@ -714,17 +713,13 @@ def display_completion_summary(app: Application, plan: PortPlan, *, pr_url: str if pr_url is not None: rows.append(('Pull request', pr_url)) - label_width = max(len(label) for label, _ in rows) - for i, (label, value) in enumerate(rows): - if i: - text.append('\n') - text.append(f'{label}:'.ljust(label_width + 2), style='bold') - text.append(value) - - app.output(Panel(text, title='Backport completed', title_align='left', border_style='cyan'), stderr=True) + app.output( + Panel(app.labeled_lines(rows), title='Backport completed', title_align='left', border_style='cyan'), + stderr=True, + ) -def _format_plan_summary(plan: PortPlan) -> Text: +def _format_plan_summary(app: Application, plan: PortPlan) -> Text: text = Text() text.append('Configuration:', style='bold') @@ -739,10 +734,8 @@ def _format_plan_summary(plan: PortPlan) -> Text: ('Verify commit', str(plan.verify)), ('Dry run', str(plan.dry_run)), ] - for label, value in rows: - text.append('\n ') - text.append(f'{label}:', style='bold') - text.append(f' {value}') + text.append('\n') + text.append_text(app.labeled_lines(rows, indent=' ', align=False)) return text diff --git a/ddev/src/ddev/cli/release/test_agent/__init__.py b/ddev/src/ddev/cli/release/test_agent/__init__.py new file mode 100644 index 0000000000000..540b2859cd45b --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/__init__.py @@ -0,0 +1,170 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""`ddev release test-agent` — dispatch the Linux + Windows Agent test workflows. + +The orchestration body lives here so the file reads top-to-bottom as the command's story. +Each step delegates to a sibling module (`validation`, `images`, `dispatch`), and every +helper module is imported lazily inside the function body so `ddev --help` only pays for +`click` from this package. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import click + +if TYPE_CHECKING: + from collections.abc import Sequence + + from ddev.cli.application import Application + from ddev.cli.release.test_agent.dispatch import DispatchedWorkflow + + +@click.command('test-agent', short_help='Dispatch the Agent test workflows against a branch or tag') +@click.option('--branch', help='Release branch to test, e.g. `7.80.x`.') +@click.option('--tag', help='Agent release tag to test, e.g. `7.80.0-rc.1` or `7.80.0`.') +@click.option('--dry-run', is_flag=True, help='Resolve images and print the plan without dispatching.') +@click.option('--monitor', is_flag=True, help='Monitor dispatched workflow jobs until both workflows finish.') +@click.option( + '--poll-interval', + type=click.FloatRange(min=1.0), + default=10.0, + show_default=True, + help='Seconds between workflow monitor polls.', +) +@click.option('--yes', '-y', is_flag=True, help='Skip the interactive confirmation prompt.') +@click.pass_obj +def test_agent( + app: Application, + branch: str | None, + tag: str | None, + dry_run: bool, + monitor: bool, + poll_interval: float, + yes: bool, +) -> None: + """Trigger `test-agent.yml` and `test-agent-windows.yml` against the resolved Agent image. + + Exactly one of `--branch` or `--tag` must be provided. When `--branch` is given, the latest + `MAJ.MIN.PATCH-rc.N` published to `registry.datadoghq.com/agent` is used as the Agent image. + When `--tag` is given, that exact tag is used. Linux and Windows (servercore) variants are + both validated against the registry before either workflow is dispatched. + """ + import logging + + from ddev.cli.release.test_agent.dispatch import dispatch_both + from ddev.cli.release.test_agent.images import build_image_refs, resolve_version, validate_images_exist + from ddev.cli.release.test_agent.validation import ( + Branch, + fetch_target, + validate_input, + verify_workflows_present_on_ref, + ) + + logging.getLogger('httpx').setLevel(logging.WARNING) + + target = validate_input(app, branch, tag) + + if not app.config.github.token: + app.abort('GitHub token required. Set `github.token` via `ddev config set github.token `.') + + fetch_target(app, target) + verify_workflows_present_on_ref(app, target) + app.display_info('') + + version = resolve_version(app, target) + app.display_info('') + + validate_images_exist(app, version) + linux_image, windows_image = build_image_refs(version) + app.display_info('') + + # GitHub's workflow_dispatch API expects every value in `inputs` to be a string, even for + # `type: boolean` workflow inputs — booleans are parsed from the lowercase string form. + # `test-py2='false'` is sent to both dispatches by design: this command is forward-looking + # and tests Python 3 only, even on Windows (where `test-agent-windows.yml` defaults + # `test-py2` to `true` for legacy reasons). + inputs: dict[str, str] = { + 'test-py3': 'true', + 'test-py2': 'false', + 'agent-image': linux_image, + 'agent-image-windows': windows_image, + } + is_branch = isinstance(target, Branch) + _print_plan(app, ref=target.name, version=version, is_branch=is_branch, inputs=inputs) + app.display_info('') + + if dry_run: + app.display_info('Dry run — no workflows dispatched.') + return + + if not yes and not click.confirm('Dispatch both workflows?', default=False): + app.abort('Aborted by user.') + + try: + workflows = dispatch_both(app.config.github.token, ref=target.name, inputs=inputs) + except RuntimeError as e: + app.abort(str(e)) + else: + if not monitor: + app.display_info('') + _print_result(app, workflows=workflows) + + if monitor: + from ddev.cli.release.test_agent.monitoring import monitor_dispatched_workflows + + try: + monitor_dispatched_workflows( + app, + app.config.github.token, + ref=target.name, + workflows=workflows, + poll_interval=poll_interval, + ) + except Exception as e: + urls = ', '.join(w.html_url for w in workflows) + app.abort(f'Failed to monitor workflows: {e}. Runs are still in flight: {urls}') + + +def _print_plan( + app: Application, + *, + ref: str, + version: str, + is_branch: bool, + inputs: dict[str, str], +) -> None: + """Render the resolved dispatch plan via the stderr-bound `display_info` channel. + + All progress lines (`display_waiting`/`display_success`) default to stderr; keeping the + plan on the same channel means piping the command into a file leaves stdout clean and + keeps the whole pre-dispatch narrative coherent on stderr. + """ + from rich.panel import Panel + + from ddev.cli.release.test_agent.validation import WORKFLOW_LINUX, WORKFLOW_WINDOWS + + rows: list[tuple[str, str]] = [ + ('Workflows', f'{WORKFLOW_LINUX}, {WORKFLOW_WINDOWS}'), + ('Ref', ref), + ] + if is_branch: + rows.append(('Resolved RC', version)) + rows.extend(inputs.items()) + + app.output( + Panel(app.labeled_lines(rows), title='Dispatch plan', title_align='left', border_style='cyan'), stderr=True + ) + + +def _print_result(app: Application, *, workflows: Sequence[DispatchedWorkflow]) -> None: + """Render the two run URLs in a rich Panel, matching the look of `ddev release port-commit`.""" + from rich.panel import Panel + + rows = [(workflow.label, workflow.html_url) for workflow in workflows] + app.output( + Panel(app.labeled_lines(rows), title='Workflows dispatched', title_align='left', border_style='cyan'), + stderr=True, + ) diff --git a/ddev/src/ddev/cli/release/test_agent/dispatch.py b/ddev/src/ddev/cli/release/test_agent/dispatch.py new file mode 100644 index 0000000000000..3c23a343af12f --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/dispatch.py @@ -0,0 +1,104 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Parallel `workflow_dispatch` orchestration for `ddev release test-agent`.""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from ddev.cli.release.test_agent.validation import REPO_NAME, REPO_OWNER, WORKFLOW_LINUX, WORKFLOW_WINDOWS + +if TYPE_CHECKING: + from ddev.utils.github_async import GitHubResponse + from ddev.utils.github_async.models import WorkflowDispatchResult + +type DispatchOutcome = GitHubResponse[WorkflowDispatchResult] | BaseException + + +@dataclass(frozen=True) +class DispatchedWorkflow: + """A workflow run created by `ddev release test-agent`.""" + + label: str + workflow_id: str + run_id: int + html_url: str + + +def dispatch_both(token: str, *, ref: str, inputs: dict[str, str]) -> tuple[DispatchedWorkflow, DispatchedWorkflow]: + """Dispatch both workflows in parallel via the async GitHub client.""" + from ddev.utils.github_async import async_github_client + + async def run_dispatches() -> tuple[DispatchOutcome, DispatchOutcome]: + async with async_github_client(token=token) as client: + return await asyncio.gather( + client.create_workflow_dispatch( + owner=REPO_OWNER, + repo=REPO_NAME, + workflow_id=WORKFLOW_LINUX, + ref=ref, + inputs=inputs, + return_run_details=True, + ), + client.create_workflow_dispatch( + owner=REPO_OWNER, + repo=REPO_NAME, + workflow_id=WORKFLOW_WINDOWS, + ref=ref, + inputs=inputs, + return_run_details=True, + ), + return_exceptions=True, + ) + + return extract_dispatched_workflows(asyncio.run(run_dispatches())) + + +def extract_dispatched_workflows( + results: tuple[DispatchOutcome, DispatchOutcome], +) -> tuple[DispatchedWorkflow, DispatchedWorkflow]: + """Pull workflow runs out of two gather results, raising on any exception with a partial-success hint. + + `asyncio.gather(return_exceptions=True)` captures `CancelledError`/`KeyboardInterrupt` + (`BaseException` subclasses, not `Exception`) into its result list. Re-raise those first + so flow-control exceptions propagate cleanly instead of being wrapped in `RuntimeError`. + """ + linux_result, windows_result = results + + for result in (linux_result, windows_result): + if isinstance(result, BaseException) and not isinstance(result, Exception): + raise result + + if isinstance(linux_result, BaseException): + if isinstance(windows_result, BaseException): + raise RuntimeError( + f'Both dispatches failed. Linux: {linux_result!r}. Windows: {windows_result!r}.' + ) from linux_result + sibling = windows_result.data.html_url + raise RuntimeError( + f'Linux dispatch failed: {linux_result}. The other workflow was dispatched at {sibling}.' + ) from linux_result + + if isinstance(windows_result, BaseException): + sibling = linux_result.data.html_url + raise RuntimeError( + f'Windows dispatch failed: {windows_result}. The other workflow was dispatched at {sibling}.' + ) from windows_result + + return ( + DispatchedWorkflow( + label='Linux', + workflow_id=WORKFLOW_LINUX, + run_id=linux_result.data.workflow_run_id, + html_url=linux_result.data.html_url, + ), + DispatchedWorkflow( + label='Windows', + workflow_id=WORKFLOW_WINDOWS, + run_id=windows_result.data.workflow_run_id, + html_url=windows_result.data.html_url, + ), + ) diff --git a/ddev/src/ddev/cli/release/test_agent/images.py b/ddev/src/ddev/cli/release/test_agent/images.py new file mode 100644 index 0000000000000..7518b4e43fe19 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/images.py @@ -0,0 +1,72 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Resolve and validate the Agent image refs that `ddev release test-agent` dispatches against.""" + +from __future__ import annotations + +import contextlib +from typing import TYPE_CHECKING + +import httpx + +from ddev.cli.release.test_agent import registry +from ddev.cli.release.test_agent.validation import Branch, ReleaseTarget + +if TYPE_CHECKING: + from collections.abc import Iterator + + from ddev.cli.application import Application + + +@contextlib.contextmanager +def registry_errors(app: Application, scope: str) -> Iterator[None]: + """Translate any `httpx.HTTPError` raised inside the block into a clean `app.abort` message. + + `scope` is interpolated into the abort text — e.g. `'tags'` for the tag listing or an + image ref like `'registry.datadoghq.com/agent:7.80.0-rc.3'` for a manifest probe. + """ + try: + yield + except httpx.HTTPError as e: + app.abort(f'Failed to query registry.datadoghq.com for {scope}: {e}') + + +def resolve_version(app: Application, target: ReleaseTarget) -> str: + """Pick the Agent image tag to test: the explicit tag, or the highest published RC for a branch.""" + if not isinstance(target, Branch): + return target.name + + major_str, minor_str, _ = target.name.split('.') + major, minor = int(major_str), int(minor_str) + + app.display_waiting(f'Looking up latest {major}.{minor}.*-rc.* in registry.datadoghq.com...') + with registry_errors(app, 'tags'): + tags = registry.list_agent_rc_tags(major, minor) + if not tags: + app.abort( + f'No `{major}.{minor}.*-rc.*` tags found in registry.datadoghq.com/agent. ' + 'Pass --tag explicitly once the first RC is published.' + ) + latest = tags[-1] + app.display_success(f'Latest RC: {latest}') + return latest + + +def build_image_refs(version: str) -> tuple[str, str]: + """Return the (linux, windows) image refs for the given Agent version.""" + base = f'registry.datadoghq.com/agent:{version}' + return base, f'{base}-servercore' + + +def validate_images_exist(app: Application, version: str) -> None: + """Check that both the Linux (``) and Windows (`-servercore`) manifests are published.""" + for tag in (version, f'{version}-servercore'): + image = f'registry.datadoghq.com/agent:{tag}' + app.display_waiting(f'Checking `{image}`...') + with registry_errors(app, f'`{image}`'): + exists = registry.manifest_exists(tag) + if not exists: + app.abort( + f'Image `{image}` not found in registry.datadoghq.com. Confirm the Agent release has been published.' + ) diff --git a/ddev/src/ddev/cli/release/test_agent/monitoring.py b/ddev/src/ddev/cli/release/test_agent/monitoring.py new file mode 100644 index 0000000000000..1677e646a0aa7 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/monitoring.py @@ -0,0 +1,299 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Monitor dispatched Agent test workflow runs.""" + +from __future__ import annotations + +import asyncio +from collections.abc import Awaitable, Callable, Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import httpx +from rich.console import Group +from rich.live import Live +from rich.panel import Panel +from rich.spinner import Spinner +from rich.text import Text +from rich.tree import Tree + +from ddev.cli.release.test_agent.validation import REPO_NAME, REPO_OWNER +from ddev.utils.github_async import async_github_client + +if TYPE_CHECKING: + from ddev.cli.release.test_agent.dispatch import DispatchedWorkflow + from ddev.cli.terminal import Terminal + from ddev.utils.github_async.client import AsyncGitHubClient + +SUCCESS_MARK = '\u2713' +ERROR_MARK = '\u2717' +RUNNING_SPINNER = 'dots' + + +@dataclass(frozen=True) +class JobState: + """Current state for one GitHub Actions job.""" + + name: str + status: str + conclusion: str | None + html_url: str | None + + @property + def is_failure(self) -> bool: + return self.status == 'completed' and self.conclusion not in {'success', 'skipped'} + + +@dataclass(frozen=True) +class WorkflowState: + """Current state for one GitHub Actions workflow run.""" + + label: str + run_id: int + status: str + conclusion: str | None + html_url: str + jobs: list[JobState] + + @property + def is_complete(self) -> bool: + return self.status == 'completed' + + @property + def failed_jobs(self) -> list[JobState]: + return [job for job in self.jobs if job.is_failure] + + +@dataclass(frozen=True) +class MonitorState: + """Current state for all monitored workflow runs.""" + + workflows: list[WorkflowState] + + @property + def is_complete(self) -> bool: + return all(workflow.is_complete for workflow in self.workflows) + + +@dataclass(frozen=True) +class JobCounts: + """Aggregate job counts for one workflow.""" + + waiting: int = 0 + running: int = 0 + succeeded: int = 0 + failed: int = 0 + skipped: int = 0 + + +async def collect_monitor_state( + client: AsyncGitHubClient, + workflows: Sequence[DispatchedWorkflow], +) -> MonitorState: + """Fetch current workflow and job state for every dispatched workflow.""" + states = await asyncio.gather(*(collect_workflow_state(client, workflow) for workflow in workflows)) + return MonitorState(list(states)) + + +async def collect_workflow_state(client: AsyncGitHubClient, workflow: DispatchedWorkflow) -> WorkflowState: + """Fetch current workflow and job state for one dispatched workflow.""" + run_response = await client.get_workflow_run(REPO_OWNER, REPO_NAME, workflow.run_id) + run = run_response.data + jobs: list[JobState] = [] + + async for page in client.list_workflow_jobs(REPO_OWNER, REPO_NAME, workflow.run_id, per_page=100): + jobs.extend( + JobState( + name=job.name, + status=job.status, + conclusion=job.conclusion, + html_url=job.html_url, + ) + for job in page.data.jobs + ) + + return WorkflowState( + label=workflow.label, + run_id=workflow.run_id, + status=run.status, + conclusion=run.conclusion, + html_url=run.html_url or workflow.html_url, + jobs=jobs, + ) + + +def monitor_dispatched_workflows( + terminal: Terminal, + token: str, + *, + ref: str, + workflows: Sequence[DispatchedWorkflow], + poll_interval: float = 10.0, +) -> None: + """Monitor dispatched workflows until GitHub marks every run completed.""" + + async def run() -> None: + async with async_github_client(token=token) as client: + await monitor_workflows(terminal, client, ref=ref, workflows=workflows, poll_interval=poll_interval) + + asyncio.run(run()) + + +async def monitor_workflows( + terminal: Terminal, + client: AsyncGitHubClient, + *, + ref: str, + workflows: Sequence[DispatchedWorkflow], + poll_interval: float = 10.0, + sleep: Callable[[float], Awaitable[None]] = asyncio.sleep, +) -> None: + """Render a live workflow monitor until every workflow run completes.""" + state = MonitorState( + [ + WorkflowState(workflow.label, workflow.run_id, 'queued', None, workflow.html_url, []) + for workflow in workflows + ] + ) + + async def poll() -> MonitorState: + nonlocal state + try: + state = await collect_monitor_state(client, workflows) + except httpx.HTTPError: + pass + return state + + if terminal.interactive and terminal.console.is_terminal: + original_stderr = terminal.console.stderr + terminal.console.stderr = True + try: + with Live( + render_monitor_panel(terminal, ref=ref, poll_interval=poll_interval, state=state), + console=terminal.console, + auto_refresh=True, + refresh_per_second=10, + redirect_stderr=False, + redirect_stdout=False, + transient=True, + vertical_overflow='crop', + ) as live: + while True: + state = await poll() + live.update( + render_monitor_panel(terminal, ref=ref, poll_interval=poll_interval, state=state), + refresh=True, + ) + if state.is_complete: + break + await sleep(poll_interval) + finally: + terminal.console.stderr = original_stderr + else: + while True: + state = await poll() + if state.is_complete: + break + await sleep(poll_interval) + + terminal.output(render_monitor_panel(terminal, ref=ref, poll_interval=poll_interval, state=state), stderr=True) + + +def render_monitor_panel( + terminal: Terminal, + *, + ref: str, + poll_interval: float, + state: MonitorState, +) -> Panel: + """Render workflow and job state as a rich panel.""" + header = terminal.labeled_lines( + [ + ('Ref', ref), + ('Polling', f'every {poll_interval:g}s'), + ] + ) + tree = Tree('Workflows', style='bold') + for workflow in state.workflows: + workflow_branch = tree.add(_workflow_label(workflow)) + if workflow.jobs: + workflow_branch.add(_job_summary_label(workflow.jobs)) + if failed_jobs := workflow.failed_jobs: + failures_branch = workflow_branch.add(Text('Failures', style='bold red')) + for job in sorted(failed_jobs, key=lambda failed_job: failed_job.name.lower()): + failures_branch.add(_failed_job_label(job)) + elif workflow.is_complete: + workflow_branch.add(Text('No jobs reported', style='bold yellow')) + else: + workflow_branch.add(Spinner(RUNNING_SPINNER, text='Waiting for jobs', style='bold magenta')) + + return Panel( + Group(header, Text(''), tree), + title='Agent test workflow monitor', + title_align='left', + border_style='cyan', + ) + + +def _workflow_label(workflow: WorkflowState) -> Text | Spinner: + if not workflow.is_complete: + return Spinner(RUNNING_SPINNER, text=f'{workflow.label} {workflow.html_url}', style='bold magenta') + + success = workflow.conclusion == 'success' + mark = SUCCESS_MARK if success else ERROR_MARK + style = 'bold cyan' if success else 'bold red' + label = Text(f'{mark} ', style=style) + label.append(workflow.label, style=f'{style} link {workflow.html_url}') + label.append(' ') + label.append(workflow.html_url, style=f'link {workflow.html_url}') + return label + + +def _job_summary_label(jobs: Sequence[JobState]) -> Text: + counts = _count_jobs(jobs) + label = Text() + label.append('Waiting: ', style='bold') + label.append(str(counts.waiting), style='bold yellow') + label.append(' Running: ', style='bold') + label.append(str(counts.running), style='bold magenta') + label.append(' Succeeded: ', style='bold') + label.append(str(counts.succeeded), style='bold cyan') + label.append(' Failed: ', style='bold') + label.append(str(counts.failed), style='bold red' if counts.failed else 'bold') + label.append(' Skipped: ', style='bold') + label.append(str(counts.skipped), style='dim' if counts.skipped else 'bold') + return label + + +def _failed_job_label(job: JobState) -> Text: + label = Text(f'{ERROR_MARK} ', style='bold red') + if job.html_url is None: + label.append(job.name, style='bold red') + return label + + label.append(job.name, style=f'bold red link {job.html_url}') + label.append(' ') + label.append(job.html_url, style=f'red link {job.html_url}') + return label + + +def _count_jobs(jobs: Sequence[JobState]) -> JobCounts: + waiting = 0 + running = 0 + succeeded = 0 + failed = 0 + skipped = 0 + for job in jobs: + if job.status != 'completed': + if job.status == 'in_progress': + running += 1 + else: + waiting += 1 + elif job.conclusion == 'success': + succeeded += 1 + elif job.conclusion == 'skipped': + skipped += 1 + else: + failed += 1 + return JobCounts(waiting=waiting, running=running, succeeded=succeeded, failed=failed, skipped=skipped) diff --git a/ddev/src/ddev/cli/release/test_agent/registry.py b/ddev/src/ddev/cli/release/test_agent/registry.py new file mode 100644 index 0000000000000..3520e01c9c1b8 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/registry.py @@ -0,0 +1,40 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Agent-specific wrappers around `ddev.utils.docker_registry`. + +The Datadog Agent image is served by `registry.datadoghq.com/agent`. This +module fixes that repository name and exposes the only agent-specific +operation we need today: filtering the published tag list down to the +`MAJ.MIN.PATCH-rc.N` RCs for a given release line. +""" + +from __future__ import annotations + +import re + +from packaging.version import InvalidVersion, Version + +from ddev.utils import docker_registry + +AGENT_REPOSITORY = 'agent' + + +def manifest_exists(tag: str, *, timeout: float = 10.0) -> bool: + """Return True if `registry.datadoghq.com/agent:` resolves to a manifest, False on 404.""" + return docker_registry.manifest_exists(AGENT_REPOSITORY, tag, timeout=timeout) + + +def list_agent_rc_tags(major: int, minor: int, *, timeout: float = 10.0) -> list[str]: + """Return all `..-rc.N` tags published to the Agent registry, sorted ascending by version.""" + pattern = re.compile(rf'^{major}\.{minor}\.\d+-rc\.\d+$') + raw_tags = docker_registry.list_tags(AGENT_REPOSITORY, timeout=timeout) + matches: list[tuple[Version, str]] = [] + for tag in raw_tags: + if not pattern.match(tag): + continue + try: + matches.append((Version(tag), tag)) + except InvalidVersion: + continue + return [tag for _, tag in sorted(matches, key=lambda pair: pair[0])] diff --git a/ddev/src/ddev/cli/release/test_agent/validation.py b/ddev/src/ddev/cli/release/test_agent/validation.py new file mode 100644 index 0000000000000..8412074565a77 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/validation.py @@ -0,0 +1,154 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Input validation, ref fetching, and workflow-file checks for `ddev release test-agent`. + +The user picks either a release branch or a release tag. Downstream code needs to treat +those two paths differently (the fetch refspec, the local ref name for `git show`, the +version-resolution logic), but the type system has to know which one is in hand. Modelling +the choice as a sum type (`Branch | Tag`) — produced by `validate_input` after the +user-facing checks — keeps the branch/tag distinction explicit at every call site and +eliminates the type-narrowing `assert`s that would otherwise be needed. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ddev.cli.application import Application + +BRANCH_PATTERN = r'^\d+\.\d+\.x$' +TAG_PATTERN = r'^\d+\.\d+\.\d+(-rc\.\d+)?$' + +# Hard-coded: the two test workflows only live on DataDog/integrations-core. Forks and other +# integrations repos (extras, marketplace) have nothing to dispatch even if the branch/tag exists, +# so deferring either component to repo metadata would just hide misconfiguration. If we ever +# ship the workflows elsewhere, plumb the target through here. +REPO_OWNER = 'DataDog' +REPO_NAME = 'integrations-core' + +# The two workflow filenames the command targets. Defined here (the layer that runs first) +# rather than in `dispatch.py`, so the import graph flows validation -> dispatch and validation +# does not pull in dispatch's async/HTTP machinery at module-load time. +WORKFLOW_LINUX = 'test-agent.yml' +WORKFLOW_WINDOWS = 'test-agent-windows.yml' + +WORKFLOW_FILES = [ + f'.github/workflows/{WORKFLOW_LINUX}', + f'.github/workflows/{WORKFLOW_WINDOWS}', +] + +# git error fragments that mean "ref exists but file is not in that tree" — i.e. the workflow +# really isn't on this branch/tag, as opposed to git failing for some other reason. +GIT_FILE_MISSING_FRAGMENTS = ( + 'exists on disk', + 'does not exist', + 'no such path', +) + +# git error fragments emitted when `git fetch` cannot resolve the named ref on origin. +GIT_FETCH_MISSING_FRAGMENTS = ( + "couldn't find remote ref", + 'no such ref', +) + + +@dataclass(frozen=True) +class Branch: + """A validated release branch the user selected via `--branch`.""" + + name: str + + +@dataclass(frozen=True) +class Tag: + """A validated release tag the user selected via `--tag` (with any leading `v` stripped).""" + + name: str + + +ReleaseTarget = Branch | Tag + + +def validate_input(app: Application, branch: str | None, tag: str | None) -> ReleaseTarget: + """Normalize and validate `--branch`/`--tag`, returning a typed `Branch` or `Tag`. + + Click can't enforce the mutually-exclusive constraint by itself, so this is the single + point that does. Every downstream helper takes the resulting `ReleaseTarget` instead of + two optional strings, which keeps the "exactly one is set" invariant in the type system + rather than restated as `assert` at every call site. + """ + if branch is not None and tag is not None: + app.abort('Cannot use --branch and --tag together; pick one.') + if branch is None and tag is None: + app.abort('Exactly one of --branch or --tag must be provided.') + + if branch is not None: + if not re.match(BRANCH_PATTERN, branch): + app.abort(f'Invalid branch: {branch!r}. Must match {BRANCH_PATTERN}.') + return Branch(branch) + + assert tag is not None + normalized = tag.removeprefix('v') + if not re.match(TAG_PATTERN, normalized): + app.abort(f'Invalid tag: {tag!r}. Must match {TAG_PATTERN}.') + return Tag(normalized) + + +def local_ref_for(target: ReleaseTarget) -> str: + """The local refname that `fetch_target` populates and that `git show` can read from.""" + if isinstance(target, Branch): + return f'origin/{target.name}' + return target.name + + +def fetch_target(app: Application, target: ReleaseTarget) -> None: + """Fetch the user's release target from origin so `local_ref_for(target)` resolves locally. + + Combines the existence check (would otherwise be a `git ls-remote` probe) with the side + effect of populating the local refs we need to read the workflow files. After this call, + `git show origin/:` or `git show :` is guaranteed to find the + target in the local clone. + """ + if isinstance(target, Branch): + kind = 'branch' + refspec = f'+refs/heads/{target.name}:refs/remotes/origin/{target.name}' + else: + kind = 'tag' + refspec = f'refs/tags/{target.name}:refs/tags/{target.name}' + + app.display_waiting(f'Fetching {kind} `{target.name}` from origin...') + try: + app.repo.git.run('fetch', '--quiet', '--depth=1', 'origin', refspec) + except OSError as e: + msg = str(e).lower() + if any(fragment in msg for fragment in GIT_FETCH_MISSING_FRAGMENTS): + app.abort(f'{kind.capitalize()} `{target.name}` not found on origin.') + else: + app.abort(f'Failed to fetch {kind} `{target.name}` from origin: {e}') + + +def verify_workflows_present_on_ref(app: Application, target: ReleaseTarget) -> None: + """Confirm both workflow files exist at the target ref (which `fetch_target` made local).""" + ref = local_ref_for(target) + kind = 'branch' if isinstance(target, Branch) else 'tag' + + missing: list[str] = [] + for path in WORKFLOW_FILES: + try: + app.repo.git.show_file(path, ref) + except OSError as e: + msg = str(e).lower() + if any(fragment in msg for fragment in GIT_FILE_MISSING_FRAGMENTS): + missing.append(path) + else: + app.abort(f'Failed to read `{path}` from `{ref}`: {e}') + + if missing: + app.abort( + f'{kind.capitalize()} `{target.name}` is missing required workflow file(s): {", ".join(missing)}. ' + 'Pick a newer ref that includes both `test-agent.yml` and `test-agent-windows.yml`.' + ) diff --git a/ddev/src/ddev/cli/terminal.py b/ddev/src/ddev/cli/terminal.py index 4248034a14076..23e29ce773baf 100644 --- a/ddev/src/ddev/cli/terminal.py +++ b/ddev/src/ddev/cli/terminal.py @@ -4,6 +4,7 @@ from __future__ import annotations import os +from collections.abc import Sequence from functools import cached_property from textwrap import indent as indent_text from time import monotonic, sleep @@ -174,6 +175,41 @@ def style_info(self, text: str) -> Text: def style_debug(self, text: str) -> Text: return Text(text, style=self._style_level_debug) + def labeled_text(self, label: str, value: str | Text = '', *, width: int | None = None) -> Text: + """Return a rich label/value line using the terminal's info style for the label.""" + text = Text() + prefix = f'{label}:' + if width is None: + prefix = f'{prefix} ' + else: + prefix = prefix.ljust(width + 2) + + text.append(prefix, style=self._style_level_info) + if isinstance(value, Text): + text.append_text(value) + else: + text.append(value) + return text + + def labeled_lines( + self, + rows: Sequence[tuple[str, str | Text]], + *, + indent: str = '', + align: bool = True, + ) -> Text: + """Return aligned rich label/value lines.""" + text = Text() + width = max((len(label) for label, _ in rows), default=0) if align else None + + for i, (label, value) in enumerate(rows): + if i: + text.append('\n') + text.append(indent) + text.append_text(self.labeled_text(label, value, width=width)) + + return text + def initialize_styles(self, styles: dict): # no cov # Lazily display errors so that they use the correct style errors = [] diff --git a/ddev/src/ddev/utils/docker_registry.py b/ddev/src/ddev/utils/docker_registry.py new file mode 100644 index 0000000000000..b68b133b414ad --- /dev/null +++ b/ddev/src/ddev/utils/docker_registry.py @@ -0,0 +1,83 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Docker Registry v2 read-only helpers. + +These helpers cover anonymous reads against any Docker Registry v2 endpoint +(`registry.datadoghq.com` is the default and serves the Datadog Agent image). +They are intentionally narrow: manifest existence checks and tag listings with +`Link: rel="next"` pagination per RFC 5988. +""" + +from __future__ import annotations + +import httpx + +DEFAULT_REGISTRY_HOST = 'registry.datadoghq.com' + +# Multi-arch (manifest list) is the canonical media type; OCI index and the +# single-arch fallbacks are accepted because the registry may serve any of +# these depending on which origin (GAR vs S3) responds. +MANIFEST_ACCEPT = ', '.join( + [ + 'application/vnd.docker.distribution.manifest.list.v2+json', + 'application/vnd.oci.image.index.v1+json', + 'application/vnd.docker.distribution.manifest.v2+json', + 'application/vnd.oci.image.manifest.v1+json', + ] +) + +# Large default page size keeps the common case to one round trip. Pagination +# is followed regardless, so the value is a performance hint, not a cap. +DEFAULT_PAGE_SIZE = 10000 + + +def manifest_exists( + repository: str, + tag: str, + *, + host: str = DEFAULT_REGISTRY_HOST, + timeout: float = 10.0, +) -> bool: + """Return True if `/v2//manifests/` resolves, False on 404.""" + response = httpx.head( + f'https://{host}/v2/{repository}/manifests/{tag}', + headers={'Accept': MANIFEST_ACCEPT}, + follow_redirects=True, + timeout=timeout, + ) + if response.status_code == 404: + return False + response.raise_for_status() + return True + + +def list_tags( + repository: str, + *, + host: str = DEFAULT_REGISTRY_HOST, + page_size: int = DEFAULT_PAGE_SIZE, + timeout: float = 10.0, +) -> list[str]: + """Return every tag published for `repository`, following `Link: rel="next"` pagination. + + Pagination is parsed via `httpx.Response.links`, which handles RFC 5988 quoting and + multi-link headers correctly. Relative URLs in the next link are resolved against + `host` (registries commonly return paths like `/v2/...?last=...`). + """ + url: str | None = f'https://{host}/v2/{repository}/tags/list?n={page_size}' + tags: list[str] = [] + while url is not None: + response = httpx.get(url, timeout=timeout) + response.raise_for_status() + try: + payload = response.json() + except ValueError as e: + raise httpx.HTTPError(f'Registry returned non-JSON body for tag list: {e}') from e + page = payload.get('tags') or [] + tags.extend(page) + next_url = response.links.get('next', {}).get('url') + if next_url and next_url.startswith('/'): + next_url = f'https://{host}{next_url}' + url = next_url + return tags diff --git a/ddev/src/ddev/utils/git.py b/ddev/src/ddev/utils/git.py index 2b5a9f6865c9c..ecd4262af09ec 100644 --- a/ddev/src/ddev/utils/git.py +++ b/ddev/src/ddev/utils/git.py @@ -124,13 +124,15 @@ def pull(self, ref): def push(self, ref): return self.capture('push', 'origin', ref) - def tag(self, value, message=None): + def tag(self, value, message=None, ref=None): """ - Create a tag with an optional message. + Create a tag with an optional message, optionally at a specific commit/ref. """ cmd = ['tag', value] if message is not None: - cmd.extend(['--message', value]) + cmd.extend(['--message', message]) + if ref is not None: + cmd.append(ref) return self.capture(*cmd) def tags(self, glob_pattern=None) -> list[str]: diff --git a/ddev/src/ddev/utils/github_async/client.py b/ddev/src/ddev/utils/github_async/client.py index 8cfdc6bfda8eb..9575f726af295 100644 --- a/ddev/src/ddev/utils/github_async/client.py +++ b/ddev/src/ddev/utils/github_async/client.py @@ -12,7 +12,7 @@ from contextlib import asynccontextmanager from dataclasses import dataclass from pathlib import Path -from typing import Any, Literal, Self +from typing import Any, Literal, Self, overload import httpx from pydantic import BaseModel, ConfigDict, Field @@ -25,6 +25,7 @@ PullRequest, PullRequestReviewComment, WorkflowDispatchResult, + WorkflowJobsList, WorkflowRun, ) @@ -161,6 +162,7 @@ def _parse_response[T: BaseModel](response: httpx.Response, model: type[T]) -> G # Endpoint methods # ------------------------------------------------------------------ + @overload async def create_workflow_dispatch( self, owner: str, @@ -169,7 +171,34 @@ async def create_workflow_dispatch( ref: str, inputs: dict[str, str] | None = None, timeout: float | None = None, - ) -> GitHubResponse[WorkflowDispatchResult]: + *, + return_run_details: Literal[True], + ) -> GitHubResponse[WorkflowDispatchResult]: ... + + @overload + async def create_workflow_dispatch( + self, + owner: str, + repo: str, + workflow_id: str | int, + ref: str, + inputs: dict[str, str] | None = None, + timeout: float | None = None, + *, + return_run_details: Literal[False] = False, + ) -> GitHubResponse[None]: ... + + async def create_workflow_dispatch( + self, + owner: str, + repo: str, + workflow_id: str | int, + ref: str, + inputs: dict[str, str] | None = None, + timeout: float | None = None, + *, + return_run_details: bool = False, + ) -> GitHubResponse[WorkflowDispatchResult] | GitHubResponse[None]: """ Calls the GitHub API to trigger a workflow dispatch event. @@ -183,20 +212,29 @@ async def create_workflow_dispatch( ref: Branch or tag name to run the workflow on. inputs: Optional key/value inputs forwarded to the workflow. timeout: Optional timeout for this specific request. Defaults to the client's default_timeout. + return_run_details: When True, requests a 200 response with the new run's metadata + (workflow_run_id, run_url, html_url) instead of the default 204 No Content. + See https://github.blog/changelog/2026-02-19-workflow-dispatch-api-now-returns-run-ids/. Returns: - GitHubResponse[WorkflowDispatchResult]: The dispatched run id and headers. + When ``return_run_details=False`` (default): ``GitHubResponse[None]`` wrapping the 204. + When ``return_run_details=True``: ``GitHubResponse[WorkflowDispatchResult]`` with the new run's + IDs and URLs. """ - body: dict[str, Any] = {"ref": ref, "return_run_details": True} + body: dict[str, Any] = {"ref": ref} if inputs is not None: body["inputs"] = inputs + if return_run_details: + body["return_run_details"] = True response = await self._request( "POST", f"/repos/{owner}/{repo}/actions/workflows/{workflow_id}/dispatches", timeout=timeout, json=body, ) - return self._parse_response(response, WorkflowDispatchResult) + if return_run_details: + return self._parse_response(response, WorkflowDispatchResult) + return GitHubResponse[None].model_validate({"data": None, "headers": dict(response.headers)}) async def get_workflow_run( self, @@ -251,6 +289,34 @@ async def list_workflow_run_artifacts( async for response in self._paginated_request("GET", endpoint, timeout=timeout, params={"per_page": per_page}): yield self._parse_response(response, ArtifactsList) + async def list_workflow_jobs( + self, + owner: str, + repo: str, + run_id: int, + per_page: int = 30, + timeout: float | None = None, + ) -> AsyncIterator[GitHubResponse[WorkflowJobsList]]: + """ + Calls the GitHub API to list jobs for a workflow run (paginated). + + GitHub API Documentation: + https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run + + Args: + owner: Repository owner (user or organisation). + repo: Repository name. + run_id: Numeric ID of the workflow run. + per_page: Number of jobs per page (default 30, max 100). + timeout: Optional timeout for this specific request. Defaults to the client's default_timeout. + + Returns: + AsyncIterator[GitHubResponse[WorkflowJobsList]]: One page of jobs per iteration. + """ + endpoint = f"/repos/{owner}/{repo}/actions/runs/{run_id}/jobs" + async for response in self._paginated_request("GET", endpoint, timeout=timeout, params={"per_page": per_page}): + yield self._parse_response(response, WorkflowJobsList) + async def create_issue_comment( self, owner: str, diff --git a/ddev/src/ddev/utils/github_async/models/__init__.py b/ddev/src/ddev/utils/github_async/models/__init__.py index 6f498789eb153..719050c723ecd 100644 --- a/ddev/src/ddev/utils/github_async/models/__init__.py +++ b/ddev/src/ddev/utils/github_async/models/__init__.py @@ -34,7 +34,10 @@ from .user import GitHubUser as GitHubUser from .workflow import Artifact as Artifact from .workflow import ArtifactsList as ArtifactsList + from .workflow import JobStep as JobStep from .workflow import WorkflowDispatchResult as WorkflowDispatchResult + from .workflow import WorkflowJob as WorkflowJob + from .workflow import WorkflowJobsList as WorkflowJobsList from .workflow import WorkflowRun as WorkflowRun # Map of exported attribute name -> submodule (relative to this package) that @@ -45,11 +48,14 @@ 'CheckRun': 'check_run', 'GitHubUser': 'user', 'IssueComment': 'comment', + 'JobStep': 'workflow', 'Label': 'label', 'PullRequest': 'pull_request', 'PullRequestRef': 'pull_request', 'PullRequestReviewComment': 'comment', 'WorkflowDispatchResult': 'workflow', + 'WorkflowJob': 'workflow', + 'WorkflowJobsList': 'workflow', 'WorkflowRun': 'workflow', } diff --git a/ddev/src/ddev/utils/github_async/models/workflow.py b/ddev/src/ddev/utils/github_async/models/workflow.py index 41662a698803e..49af424bf6dbf 100644 --- a/ddev/src/ddev/utils/github_async/models/workflow.py +++ b/ddev/src/ddev/utils/github_async/models/workflow.py @@ -5,7 +5,7 @@ from __future__ import annotations -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field class WorkflowRun(BaseModel): @@ -23,11 +23,13 @@ class WorkflowRun(BaseModel): class WorkflowDispatchResult(BaseModel): - """Response payload from a successful workflow dispatch.""" + """Run metadata returned by `POST /actions/workflows/{id}/dispatches` when `return_run_details=True`.""" model_config = ConfigDict(extra="ignore") workflow_run_id: int + run_url: str + html_url: str class Artifact(BaseModel): @@ -50,3 +52,37 @@ class ArtifactsList(BaseModel): total_count: int artifacts: list[Artifact] + + +class JobStep(BaseModel): + """A single step within a GitHub Actions job.""" + + model_config = ConfigDict(extra="ignore") + + name: str + status: str + conclusion: str | None = None + number: int | None = None + + +class WorkflowJob(BaseModel): + """A single job within a GitHub Actions workflow run.""" + + model_config = ConfigDict(extra="ignore") + + id: int + run_id: int + name: str + status: str + conclusion: str | None = None + html_url: str | None = None + steps: list[JobStep] = Field(default_factory=list) + + +class WorkflowJobsList(BaseModel): + """A list of jobs with a total count.""" + + model_config = ConfigDict(extra="ignore") + + total_count: int + jobs: list[WorkflowJob] diff --git a/ddev/tests/cli/ci/tests/test_messages.py b/ddev/tests/cli/ci/tests/test_messages.py new file mode 100644 index 0000000000000..db8afba2180e1 --- /dev/null +++ b/ddev/tests/cli/ci/tests/test_messages.py @@ -0,0 +1,96 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Tests for the ci/tests pipeline messages.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from ddev.cli.ci.tests.messages import ARTIFACT_NAME_DISALLOWED, BatchJob, BatchJobResult +from ddev.utils.github_async.models import WorkflowJob + + +def batch_job( + name="job-1", + target="ntp", + runner="ubuntu-latest", + environment="py3.13", + platform="linux", + unit_tests=True, + e2e_tests=False, +) -> BatchJob: + return BatchJob( + name=name, + target=target, + runner=runner, + environment=environment, + platform=platform, + unit_tests=unit_tests, + e2e_tests=e2e_tests, + ) + + +def test_artifact_name_built_from_target_env_platform() -> None: + assert batch_job().artifact_name() == "ntp_py3.13_linux" + + +@pytest.mark.parametrize("field", ["name", "runner", "unit_tests", "e2e_tests"]) +def test_artifact_name_ignores_non_identifying_fields(field: str) -> None: + # name / runner / unit_tests / e2e_tests are not part of the artifact name. + changed = {"name": "other-job", "runner": "windows-latest", "unit_tests": False, "e2e_tests": True}[field] + assert batch_job(**{field: changed}).artifact_name() == batch_job().artifact_name() + + +@pytest.mark.parametrize( + ("field", "value"), + [("target", "kafka"), ("environment", "py3.12"), ("platform", "windows")], +) +def test_artifact_name_varies_with_identifying_fields(field: str, value: str) -> None: + assert batch_job(**{field: value}).artifact_name() != batch_job().artifact_name() + + +def test_artifact_name_sanitizes_disallowed_characters() -> None: + name = batch_job(target='a/b:c*d?e|f"gi\\j', environment="x\r\ny").artifact_name() + assert ARTIFACT_NAME_DISALLOWED.search(name) is None + + +def test_correlate_matches_jobs_and_artifacts(tmp_path: Path) -> None: + job = batch_job("j1") + base = job.artifact_name() + artifact_dir = tmp_path / base + artifact_dir.mkdir() + workflow_job = WorkflowJob(id=1, run_id=123, name="j1", status="completed", conclusion="success") + + [result] = BatchJobResult.correlate([job], [workflow_job], {base: artifact_dir}) + + assert result.job == job + assert result.workflow_job is workflow_job + assert result.artifact_name_path == str(artifact_dir) + assert result.unit_artifact_name == f"unit-{base}" + assert result.e2e_artifact_name == f"e2e-{base}" + assert result.coverage_artifact_name == f"coverage-{base}" + + +def test_correlate_without_workflow_or_artifact_match() -> None: + # A job absent from the workflow API and with no matching artifact folder still yields a + # well-formed result whose correlated facets are None. + job = batch_job("j1") + + [result] = BatchJobResult.correlate([job], [], {}) + + assert result.job == job + assert result.workflow_job is None + assert result.artifact_name_path is None + + +def test_correlate_ignores_artifact_dir_missing_on_disk(tmp_path: Path) -> None: + # A mapped path that does not exist on disk is not recorded. + job = batch_job("j1") + base = job.artifact_name() + + [result] = BatchJobResult.correlate([job], [], {base: tmp_path / base}) + + assert result.artifact_name_path is None diff --git a/ddev/tests/cli/ci/tests/test_task_test_runner.py b/ddev/tests/cli/ci/tests/test_task_test_runner.py index 603b6839049fd..7ef13d5a8d2fd 100644 --- a/ddev/tests/cli/ci/tests/test_task_test_runner.py +++ b/ddev/tests/cli/ci/tests/test_task_test_runner.py @@ -19,6 +19,8 @@ from ddev.utils.github_async.models import ( Artifact, ArtifactsList, + WorkflowJob, + WorkflowJobsList, WorkflowRun, ) from tests.helpers.github_async import FakeAsyncGitHubClient @@ -28,27 +30,27 @@ # --------------------------------------------------------------------------- -def _wrap(data: Any) -> GitHubResponse[Any]: +def wrap(data: Any) -> GitHubResponse[Any]: return GitHubResponse(data=data, headers={}) -def _job(name: str = "job-1") -> BatchJob: +def make_job(name: str = "job-1", environment: str = "py3.13") -> BatchJob: return BatchJob( name=name, target="ntp", runner="ubuntu-latest", - environment="py3.13", + environment=environment, platform="linux", unit_tests=True, e2e_tests=False, ) -_DEFAULT_URL = object() +DEFAULT_URL = object() -def _artifact(idx: int, expired: bool = False, archive_download_url: Any = _DEFAULT_URL) -> Artifact: - url = f"https://api.github.com/artifact/{idx}/zip" if archive_download_url is _DEFAULT_URL else archive_download_url +def make_artifact(idx: int, expired: bool = False, archive_download_url: Any = DEFAULT_URL) -> Artifact: + url = f"https://api.github.com/artifact/{idx}/zip" if archive_download_url is DEFAULT_URL else archive_download_url return Artifact( id=idx, name=f"artifact-{idx}", @@ -59,7 +61,7 @@ def _artifact(idx: int, expired: bool = False, archive_download_url: Any = _DEFA ) -def _workflow_run(status: str = "completed", conclusion: str | None = "success") -> WorkflowRun: +def make_workflow_run(status: str = "completed", conclusion: str | None = "success") -> WorkflowRun: return WorkflowRun( id=123, name="test-batch", @@ -69,15 +71,29 @@ def _workflow_run(status: str = "completed", conclusion: str | None = "success") ) -def _artifacts_page(artifacts: list[Artifact]) -> GitHubResponse[ArtifactsList]: - return _wrap(ArtifactsList(total_count=len(artifacts), artifacts=list(artifacts))) +def artifacts_page(artifacts: list[Artifact]) -> GitHubResponse[ArtifactsList]: + return wrap(ArtifactsList(total_count=len(artifacts), artifacts=list(artifacts))) -def _mock_artifacts(fake: FakeAsyncGitHubClient, artifacts: list[Artifact]) -> None: - fake.mock_response("list_workflow_run_artifacts", _artifacts_page(artifacts)) +def mock_artifacts(fake: FakeAsyncGitHubClient, artifacts: list[Artifact]) -> None: + fake.mock_response("list_workflow_run_artifacts", artifacts_page(artifacts)) -def _make_runner(client: FakeAsyncGitHubClient, tmp_path: Path) -> TaskTestRunner: +def make_artifact_for(idx: int, job: BatchJob) -> Artifact: + """Artifact whose name matches a job's deterministic artifact name (the upload/download contract).""" + artifact = make_artifact(idx) + return artifact.model_copy(update={"name": job.artifact_name()}) + + +def make_workflow_job(name: str, conclusion: str = "success") -> WorkflowJob: + return WorkflowJob(id=1, run_id=123, name=name, status="completed", conclusion=conclusion) + + +def mock_jobs(fake: FakeAsyncGitHubClient, jobs: list[WorkflowJob]) -> None: + fake.mock_response("list_workflow_jobs", wrap(WorkflowJobsList(total_count=len(jobs), jobs=list(jobs)))) + + +def make_runner(client: FakeAsyncGitHubClient, tmp_path: Path) -> TaskTestRunner: options = TestRunnerOptions( owner="DataDog", repo="integrations-core", @@ -97,15 +113,38 @@ def _make_runner(client: FakeAsyncGitHubClient, tmp_path: Path) -> TaskTestRunne return runner -def _drain_queue(queue: asyncio.Queue[BaseMessage]) -> list[BaseMessage]: +def drain_queue(queue: asyncio.Queue[BaseMessage]) -> list[BaseMessage]: out: list[BaseMessage] = [] while not queue.empty(): out.append(queue.get_nowait()) return out -def _batch(batch_id: str = "batch-err") -> TestBatch: - return TestBatch(id=batch_id, job_list=[_job()], jobs_count=1, integrations=["ntp"]) +def make_batch(batch_id: str = "batch-err") -> TestBatch: + return TestBatch(id=batch_id, job_list=[make_job()], jobs_count=1, integrations=["ntp"]) + + +async def run_happy_path(tmp_path: Path) -> tuple[FakeAsyncGitHubClient, BatchFinished]: + """Run a clean two-job batch through the runner once and return the client and the BatchFinished. + + The two jobs share a target/environment/platform, so their artifact names collide with each other + and never match the generic ``artifact-N`` uploads: correlation therefore finds no match. + """ + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1), make_artifact(2)]) + runner = make_runner(fake, tmp_path) + + batch = TestBatch( + id="batch-1", job_list=[make_job("j1"), make_job("j2")], jobs_count=2, integrations=["ntp", "kafka"] + ) + await runner.process_message(batch) + + submitted = drain_queue(runner.queue) + assert len(submitted) == 1 + finished = submitted[0] + assert isinstance(finished, BatchFinished) + return fake, finished # --------------------------------------------------------------------------- @@ -131,21 +170,14 @@ def test_conclusion_to_status(conclusion: str | None, expected: str) -> None: # --------------------------------------------------------------------------- -# process_message +# process_message — happy path (one concern per test) # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_process_message_happy_path(tmp_path: Path) -> None: - fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1), _artifact(2)]) - runner = _make_runner(fake, tmp_path) +async def test_dispatches_workflow_with_job_list_payload(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) - batch = TestBatch(id="batch-1", job_list=[_job("j1"), _job("j2")], jobs_count=2, integrations=["ntp", "kafka"]) - await runner.process_message(batch) - - # Dispatch once, with the right shape. dispatch_calls = fake.calls_to("create_workflow_dispatch") assert len(dispatch_calls) == 1 assert dispatch_calls[0].kwargs == { @@ -154,6 +186,7 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: "workflow_id": "test-batch.yaml", "ref": "master", "timeout": None, + "return_run_details": True, "inputs": { "batch_id": "batch-1", "checkout_sha": "merge-sha-bbb", @@ -168,6 +201,7 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: "platform": "linux", "unit_tests": True, "e2e_tests": False, + "artifact_name": "ntp_py3.13_linux", }, { "name": "j2", @@ -177,13 +211,18 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: "platform": "linux", "unit_tests": True, "e2e_tests": False, + "artifact_name": "ntp_py3.13_linux", }, ] ), }, } - # Check run opened with the PR head SHA, in_progress, with the workflow URL as details_url. + +@pytest.mark.asyncio +async def test_opens_check_run_with_head_sha_and_details_url(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) + create_calls = fake.calls_to("create_check_run") assert len(create_calls) == 1 cr = create_calls[0].kwargs @@ -192,30 +231,56 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: assert cr["name"] == "test-batch/batch-1" assert cr["details_url"] == "https://github.com/o/r/actions/runs/123" - # Both artifacts downloaded under //- (collision-safe path). + +@pytest.mark.asyncio +async def test_downloads_all_batch_artifacts(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) + download_calls = fake.calls_to("download_artifact") assert len(download_calls) == 2 assert (download_calls[0].kwargs["archive_download_url"], download_calls[0].kwargs["dest_path"]) == ( "https://api.github.com/artifact/1/zip", - tmp_path / "123" / "1-artifact-1", + tmp_path / "artifact-1", ) assert (download_calls[1].kwargs["archive_download_url"], download_calls[1].kwargs["dest_path"]) == ( "https://api.github.com/artifact/2/zip", - tmp_path / "123" / "2-artifact-2", + tmp_path / "artifact-2", ) - # Exactly one BatchFinished message submitted with the right fields. - submitted = _drain_queue(runner.queue) - assert len(submitted) == 1 - finished = submitted[0] - assert isinstance(finished, BatchFinished) + +@pytest.mark.asyncio +async def test_emits_batch_finished_with_run_metadata(tmp_path: Path) -> None: + _, finished = await run_happy_path(tmp_path) + assert finished.id == "batch-1" assert finished.status == "success" assert finished.run_id == 123 assert finished.workflow_url == "https://github.com/o/r/actions/runs/123" - assert finished.artifacts_path == str(tmp_path / "123") + assert finished.artifacts_path == str(tmp_path) + + +@pytest.mark.asyncio +async def test_batch_finished_records_unmatched_correlation_when_no_match(tmp_path: Path) -> None: + # The two jobs' artifact names collide and don't match the generic artifacts, and there is no + # jobs API match, so both correlated facets are None while the per-facet file names are recorded. + _, finished = await run_happy_path(tmp_path) + + assert [r.job.name for r in finished.batch_jobs] == ["j1", "j2"] + assert all(r.workflow_job is None and r.artifact_name_path is None for r in finished.batch_jobs) + + first = finished.batch_jobs[0] + base = make_job("j1").artifact_name() + assert (first.unit_artifact_name, first.e2e_artifact_name, first.coverage_artifact_name) == ( + f"unit-{base}", + f"e2e-{base}", + f"coverage-{base}", + ) + + +@pytest.mark.asyncio +async def test_closes_check_run_with_workflow_conclusion(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) - # Check run closed with the GitHub conclusion. update_calls = fake.calls_to("update_check_run") assert len(update_calls) == 1 upd = update_calls[0].kwargs @@ -224,16 +289,97 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: assert upd["conclusion"] == "success" +# --------------------------------------------------------------------------- +# process_message — correlation +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_process_message_correlates_batch_jobs(tmp_path: Path) -> None: + # A failed multi-job run where j1 passed and j2 failed: each batch_jobs entry must carry its + # own true per-job status and its artifact directory, resolved by the job's artifact name. + # The two jobs differ in an artifact-relevant field (environment) so their base names differ. + j1, j2 = make_job("j1", environment="py3.13"), make_job("j2", environment="py3.12") + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "failure")) + mock_artifacts(fake, [make_artifact_for(1, j1), make_artifact_for(2, j2)]) + mock_jobs(fake, [make_workflow_job("j1", "success"), make_workflow_job("j2", "failure")]) + runner = make_runner(fake, tmp_path) + + await runner.process_message(TestBatch(id="batch-c", job_list=[j1, j2], jobs_count=2, integrations=["ntp"])) + + finished = drain_queue(runner.queue)[0] + assert isinstance(finished, BatchFinished) + assert finished.status == "failure" + + results = {r.job.name: r for r in finished.batch_jobs} + assert set(results) == {"j1", "j2"} + # Passing job is not marked failed; each carries its true workflow-run conclusion. + assert results["j1"].workflow_job is not None and results["j1"].workflow_job.conclusion == "success" + assert results["j2"].workflow_job is not None and results["j2"].workflow_job.conclusion == "failure" + # Each job's single artifact folder is resolved by its base artifact name (no heuristic matching). + assert results["j1"].artifact_name_path == str(tmp_path / j1.artifact_name()) + assert results["j2"].artifact_name_path == str(tmp_path / j2.artifact_name()) + # The per-facet file names inside each folder are recorded from the base artifact name. + assert results["j1"].unit_artifact_name == f"unit-{j1.artifact_name()}" + assert results["j2"].coverage_artifact_name == f"coverage-{j2.artifact_name()}" + + +@pytest.mark.asyncio +async def test_process_message_batch_job_without_workflow_match(tmp_path: Path) -> None: + # A job present in the batch but absent from the workflow-run API response still yields a + # well-formed entry: its artifact is located but workflow_job is None. + job = make_job("j1") + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact_for(1, job)]) + # list_workflow_jobs defaults to an empty page. + runner = make_runner(fake, tmp_path) + + await runner.process_message(TestBatch(id="batch-d", job_list=[job], jobs_count=1, integrations=["ntp"])) + + finished = drain_queue(runner.queue)[0] + assert isinstance(finished, BatchFinished) + [result] = finished.batch_jobs + assert result.job == job + assert result.workflow_job is None + assert result.artifact_name_path == str(tmp_path / job.artifact_name()) + + +@pytest.mark.asyncio +async def test_process_message_batch_job_without_artifacts(tmp_path: Path) -> None: + # A job with no artifacts on disk still yields a well-formed entry with artifact_name_path None. + job = make_job("j1") + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, []) + mock_jobs(fake, [make_workflow_job("j1", "success")]) + runner = make_runner(fake, tmp_path) + + await runner.process_message(TestBatch(id="batch-e", job_list=[job], jobs_count=1, integrations=["ntp"])) + + finished = drain_queue(runner.queue)[0] + assert isinstance(finished, BatchFinished) + [result] = finished.batch_jobs + assert result.workflow_job is not None and result.workflow_job.conclusion == "success" + assert result.artifact_name_path is None + + +# --------------------------------------------------------------------------- +# process_message — conclusions and resilience +# --------------------------------------------------------------------------- + + @pytest.mark.asyncio async def test_process_message_failure_path(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "failure")) - _mock_artifacts(fake, [_artifact(1)]) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "failure")) + mock_artifacts(fake, [make_artifact(1)]) + runner = make_runner(fake, tmp_path) - await runner.process_message(TestBatch(id="batch-2", job_list=[_job()], jobs_count=1, integrations=["ntp"])) + await runner.process_message(TestBatch(id="batch-2", job_list=[make_job()], jobs_count=1, integrations=["ntp"])) - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -248,14 +394,14 @@ async def test_process_message_failure_path(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_skipped_conclusion(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "skipped")) - _mock_artifacts(fake, []) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "skipped")) + mock_artifacts(fake, []) + runner = make_runner(fake, tmp_path) - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # A "skipped" GitHub conclusion maps to a "skipped" BatchFinished and a "skipped" check run. - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -271,14 +417,14 @@ async def test_process_message_polls_until_completed(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() # Initial get + polls until "completed"; FIFO one-shots replay in order. for status in ("queued", "in_progress", "in_progress", "completed"): - fake.mock_response("get_workflow_run", _workflow_run(status, "success"), once=True) - _mock_artifacts(fake, []) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run(status, "success"), once=True) + mock_artifacts(fake, []) + runner = make_runner(fake, tmp_path) - await runner.process_message(TestBatch(id="batch-3", job_list=[_job()], jobs_count=1, integrations=["ntp"])) + await runner.process_message(TestBatch(id="batch-3", job_list=[make_job()], jobs_count=1, integrations=["ntp"])) assert len(fake.calls_to("get_workflow_run")) == 4 - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 assert isinstance(submitted[0], BatchFinished) assert submitted[0].status == "success" @@ -287,18 +433,18 @@ async def test_process_message_polls_until_completed(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_skips_expired_artifacts(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts( + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts( fake, [ - _artifact(1), - _artifact(2, expired=True), - _artifact(3, archive_download_url=None), + make_artifact(1), + make_artifact(2, expired=True), + make_artifact(3, archive_download_url=None), ], ) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) - await runner.process_message(TestBatch(id="batch-4", job_list=[_job()], jobs_count=1, integrations=["ntp"])) + await runner.process_message(TestBatch(id="batch-4", job_list=[make_job()], jobs_count=1, integrations=["ntp"])) # Only the non-expired artifact with a download URL should be fetched. download_calls = fake.calls_to("download_artifact") @@ -309,14 +455,14 @@ async def test_process_message_skips_expired_artifacts(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_null_conclusion(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", None)) - _mock_artifacts(fake, []) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", None)) + mock_artifacts(fake, []) + runner = make_runner(fake, tmp_path) - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # A null GitHub conclusion maps to a "failure" BatchFinished and a "neutral" check run. - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -330,19 +476,19 @@ async def test_process_message_null_conclusion(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_emits_batch_finished_when_listing_artifacts_fails(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) fake.mock_response("list_workflow_run_artifacts", RuntimeError("boom-list-artifacts")) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) # A failure listing artifacts must not abort the batch: the check run is still closed # and exactly one BatchFinished is emitted with the workflow's real conclusion. - await runner.process_message(_batch()) + await runner.process_message(make_batch()) update_calls = fake.calls_to("update_check_run") assert len(update_calls) == 1 assert update_calls[0].kwargs["conclusion"] == "success" - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -352,16 +498,16 @@ async def test_process_message_emits_batch_finished_when_listing_artifacts_fails @pytest.mark.asyncio async def test_process_message_swallows_check_run_close_failure(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1)]) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1)]) fake.mock_response("update_check_run", RuntimeError("boom-close")) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) # A failure closing the check run must not propagate or suppress the BatchFinished. - await runner.process_message(_batch()) + await runner.process_message(make_batch()) assert len(fake.calls_to("update_check_run")) == 1 - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 assert isinstance(submitted[0], BatchFinished) assert submitted[0].status == "success" @@ -370,16 +516,16 @@ async def test_process_message_swallows_check_run_close_failure(tmp_path: Path) @pytest.mark.asyncio async def test_download_failure_for_one_artifact_does_not_abort_others(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1), _artifact(2), _artifact(3)]) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1), make_artifact(2), make_artifact(3)]) fake.mock_response( "download_artifact", RuntimeError("download failure for artifact 2"), archive_download_url="https://api.github.com/artifact/2/zip", ) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # All three were attempted; the failure for #2 didn't abort #3. urls = [call.kwargs["archive_download_url"] for call in fake.calls_to("download_artifact")] @@ -388,7 +534,7 @@ async def test_download_failure_for_one_artifact_does_not_abort_others(tmp_path: "https://api.github.com/artifact/2/zip", "https://api.github.com/artifact/3/zip", ] - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 assert isinstance(submitted[0], BatchFinished) assert submitted[0].status == "success" @@ -409,10 +555,10 @@ async def test_failure_before_check_run_opens_does_not_create_check_run(tmp_path fake.mock_response("create_workflow_dispatch", boom) else: fake.mock_response("get_workflow_run", boom) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) with pytest.raises(RuntimeError, match=f"boom-{failure_point}"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # The check run is never opened, so there is nothing to close. fake.assert_not_called("create_check_run") @@ -424,12 +570,12 @@ async def test_failure_mid_poll_closes_check_run_as_cancelled(tmp_path: Path) -> boom = RuntimeError("boom-mid-poll") fake = FakeAsyncGitHubClient() # Initial get succeeds (still running), the first poll raises. - fake.mock_response("get_workflow_run", _workflow_run("queued"), once=True) + fake.mock_response("get_workflow_run", make_workflow_run("queued"), once=True) fake.mock_response("get_workflow_run", boom, once=True) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) with pytest.raises(RuntimeError, match="boom-mid-poll"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # The check run was opened and the finally closed it as cancelled. assert len(fake.calls_to("create_check_run")) == 1 @@ -443,12 +589,12 @@ async def test_failure_mid_poll_closes_check_run_as_cancelled(tmp_path: Path) -> async def test_failure_at_create_check_run_does_not_close_check_run(tmp_path: Path) -> None: boom = RuntimeError("boom-create-check-run") fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) fake.mock_response("create_check_run", boom) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) with pytest.raises(RuntimeError, match="boom-create-check-run"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # The open was attempted but failed before the try/finally, so there is no check run to close. assert len(fake.calls_to("create_check_run")) == 1 @@ -459,9 +605,9 @@ async def test_failure_at_create_check_run_does_not_close_check_run(tmp_path: Pa async def test_failure_at_submit_message_closes_check_run_as_success(tmp_path: Path) -> None: boom = RuntimeError("boom-submit-message") fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1)]) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1)]) + runner = make_runner(fake, tmp_path) class _BoomQueue: def put_nowait(self, _: Any) -> None: @@ -470,7 +616,7 @@ def put_nowait(self, _: Any) -> None: runner.queue = _BoomQueue() # type: ignore[assignment] with pytest.raises(RuntimeError, match="boom-submit-message"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # Workflow completed cleanly; the finally closed the check run as success before submit raised. assert len(fake.calls_to("create_check_run")) == 1 diff --git a/ddev/tests/cli/release/branch/test_tag.py b/ddev/tests/cli/release/branch/test_tag.py index da8da18601b1e..512e350444055 100644 --- a/ddev/tests/cli/release/branch/test_tag.py +++ b/ddev/tests/cli/release/branch/test_tag.py @@ -8,6 +8,8 @@ from ddev.utils.git import GitRepository +ORIGIN_REF = 'origin/7.56.x' + def test_tag_check_open_prs_warns_and_allows_continue(ddev, git, mocker, config_file): config_file.model.github = {'user': 'test-user', 'token': 'test-token'} @@ -22,11 +24,11 @@ def test_tag_check_open_prs_warns_and_allows_continue(ddev, git, mocker, config_ return_value=[mock_pr], ) - result = ddev('release', 'branch', 'tag', '--final', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') assert result.exit_code == 0, result.output assert git.method_calls[-2:] == [ - c.tag('7.56.0', message='7.56.0'), + c.tag('7.56.0', message='7.56.0', ref=ORIGIN_REF), c.push('7.56.0'), ] assert 'Found 1 open PR(s) targeting base branch 7.56.x' in result.output @@ -41,7 +43,7 @@ def test_tag_skip_open_pr_check(ddev, git, mocker, config_file): list_prs = mocker.patch('ddev.utils.github.GitHubManager.list_open_pull_requests_targeting_base') - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='y\n') _assert_tag_pushed(git, result, '7.56.0') list_prs.assert_not_called() @@ -56,7 +58,7 @@ def test_tag_github_api_error_degrades_gracefully(ddev, git, mocker, config_file side_effect=Exception('API error'), ) - result = ddev('release', 'branch', 'tag', '--final', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') _assert_tag_pushed(git, result, '7.56.0') assert 'unable to check for open PRs' in result.output @@ -64,12 +66,14 @@ def test_tag_github_api_error_degrades_gracefully(ddev, git, mocker, config_file NO_CONFIRMATION_SO_ABORT = 'Did not get confirmation, aborting. Did not create or push the tag.' RC_NUMBER_PROMPT = 'What RC number are we tagging? (hit ENTER to accept suggestion) [{}]' +TAG_THIS_RELEASE_PROMPT = 'You are on release branch `7.56.x`. Tag this release?' +LS_REMOTE_OK = 'abc123\trefs/heads/7.56.x\n' EXAMPLE_TAGS = [ '7.56.0-rc.1', # Random RC tag from DBM. We should make sure we ignore it. '7.56.0-rc.1-dbm-agent-jobs', - # Icluding RC 11 is interesting because it makes sure we we parse the versions before we sort them. + # Including RC 11 is interesting because it makes sure we parse the versions before we sort them. # The naive sort will think RC 11 is earlier than RC 2. '7.56.0-rc.11', '7.56.0-rc.2', @@ -81,12 +85,28 @@ def test_tag_github_api_error_degrades_gracefully(ddev, git, mocker, config_file ] +def _capture_dispatch(*args): + """Dispatch `git.capture(...)` mock calls by their first argument. + + Tests can override individual subcommands by replacing `git.capture.side_effect` outright. + Defaulting to a per-subcommand mapping (instead of one global return_value) keeps unrelated + tests from silently masking new code paths. + """ + if not args: + return '' + sub = args[0] + if sub == 'ls-remote': + return LS_REMOTE_OK + return '' + + @pytest.fixture def basic_git(mocker): mock_git = mocker.create_autospec(GitRepository) # We're patching the creation of the GitRepository class. # That's why we need a function that returns the mock. mocker.patch('ddev.repo.core.GitRepository', lambda _: mock_git) + mock_git.capture.side_effect = _capture_dispatch return mock_git @@ -95,27 +115,22 @@ def git(basic_git, mocker): mocker.patch('ddev.cli.release.branch.tag._build_agent_yaml_points_to_main', return_value=False) basic_git.current_branch.return_value = '7.56.x' basic_git.tags.return_value = EXAMPLE_TAGS[:] - yield basic_git - assert basic_git.method_calls[:4] == [ - c.current_branch(), - c.pull('7.56.x'), - c.fetch_tags(), - c.tags(glob_pattern='7.56.*'), - ] + return basic_git -def _assert_tag_pushed(git, result, tag): +def _assert_tag_pushed(git, result, tag, ref=ORIGIN_REF): assert result.exit_code == 0, result.output assert git.method_calls[-2:] == [ - c.tag(tag, message=tag), + c.tag(tag, message=tag, ref=ref), c.push(tag), ] - assert f'Create and push this tag: {tag}?' in result.output + expected_prompt = f'Create and push this tag: {tag}?' + assert expected_prompt in result.output -def test_wrong_branch(ddev, basic_git): +def test_wrong_branch_no_release_aborts(ddev, basic_git): """ - Given a branch that doesn't match the release branch pattern we should abort. + With no --release and not on a release branch, the command aborts and asks for --release. """ name = 'foo' basic_git.current_branch.return_value = name @@ -123,15 +138,65 @@ def test_wrong_branch(ddev, basic_git): result = ddev('release', 'branch', 'tag') assert result.exit_code == 1, result.output - assert rf'Invalid branch name: {name}. Branch name must match the pattern ^\d+\.\d+\.x$' in result.output - assert basic_git.method_calls == [c.current_branch()] + assert 'is not a release branch' in result.output + assert '--release' in result.output + + +def test_invalid_release_value_aborts(ddev, git): + result = ddev('release', 'branch', 'tag', '--release', 'not-a-release') + assert result.exit_code != 0, result.output + assert 'Invalid `--release` value' in result.output + + +@pytest.mark.parametrize( + 'release_input', + [ + pytest.param('7.56', id='major.minor'), + pytest.param('7.56.x', id='major.minor.x'), + ], +) +def test_release_input_normalized(ddev, git, release_input): + """ + `--release` accepts both `7.56` and `7.56.x` and normalizes to `7.56.x`. + """ + result = ddev('release', 'branch', 'tag', '--release', release_input, '--final', input='y\n') + + _assert_tag_pushed(git, result, '7.56.0') + + +def test_release_branch_not_on_origin_aborts(ddev, git): + """ + If the release branch is missing from origin, the command aborts. + """ + git.capture.side_effect = lambda *args: '' + result = ddev('release', 'branch', 'tag', '--release', '7.99.x', '--final', input='y\n') + + assert result.exit_code == 1, result.output + assert 'does not exist on `origin`' in result.output + + +def test_confirm_release_branch_when_no_release_arg(ddev, git): + """ + With no --release and already on a release branch, we ask to confirm before tagging. + """ + result = ddev('release', 'branch', 'tag', '--final', input='y\ny\n') + + _assert_tag_pushed(git, result, '7.56.0') + assert TAG_THIS_RELEASE_PROMPT in result.output + + +def test_decline_release_branch_when_no_release_arg(ddev, git): + result = ddev('release', 'branch', 'tag', '--final', input='n\n') + assert result.exit_code == 1, result.output + assert TAG_THIS_RELEASE_PROMPT in result.output + assert NO_CONFIRMATION_SO_ABORT in result.output def test_middle_of_release_next_rc(ddev, git): """ We're in the middle of a release, some RCs are already done. We want to create the next RC. """ - result = ddev('release', 'branch', 'tag', input='\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input='\ny\n') _assert_tag_pushed(git, result, '7.56.0-rc.12') assert RC_NUMBER_PROMPT.format('12') in result.output @@ -149,16 +214,10 @@ def test_middle_of_release_next_rc(ddev, git): @pytest.mark.parametrize('last_rc', [11, 12]) def test_do_not_confirm_non_sequential_rc(ddev, git, rc_num, no_confirm, last_rc): """ - We're in the middle of a release, some RCs are already done. User wants to create the next RC. - - However the user asks to create an RC that's less than the latest RC number. - This is unusual, so we give a warning and ask user to confirm. If they don't, we abort. - - Important: we are not overwriting an existing RC tag. + Reject the warning when going backwards in RC numbers. """ - git.tags.return_value.append(f'7.56.0-rc.{last_rc}') - result = ddev('release', 'branch', 'tag', input=f'{rc_num}\n{no_confirm}\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num}\n{no_confirm}\n') assert RC_NUMBER_PROMPT.format(str(last_rc + 1)) in result.output assert ( @@ -172,15 +231,7 @@ def test_do_not_confirm_non_sequential_rc(ddev, git, rc_num, no_confirm, last_rc @pytest.mark.parametrize('rc_num', ['3', '10']) def test_confirm_non_sequential_rc(ddev, git, rc_num): - """ - We're in the middle of a release, some RCs are already done. User wants to create the next RC. - - However the user asks to create an RC that's less than the latest RC number. - This is unusual, so we give a warning and ask user to confirm. If they do, we create the tag. - - Important: we are not overwriting an existing RC tag. - """ - result = ddev('release', 'branch', 'tag', input=f'{rc_num}\ny\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num}\ny\ny\n') assert RC_NUMBER_PROMPT.format('12') in result.output assert ( @@ -194,13 +245,9 @@ def test_confirm_non_sequential_rc(ddev, git, rc_num): @pytest.mark.parametrize('rc_num', ['1', '5', '11']) def test_abort_if_rc_tag_exists(ddev, git, rc_num): """ - We're in the middle of a release, some RCs are already done. User wants to create the next RC. - - However the user asks to create an RC for which we already have a tag. This requires special git flags to clobber - the local and remote tags. To keep our logic simple we give up here and leave a hint how the user can proceed. + Refuse to overwrite an existing RC tag. """ - - result = ddev('release', 'branch', 'tag', input=f'{rc_num}\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num}\ny\n') assert result.exit_code == 1, result.output assert RC_NUMBER_PROMPT.format('12') in result.output @@ -208,10 +255,7 @@ def test_abort_if_rc_tag_exists(ddev, git, rc_num): def test_abort_if_tag_less_than_one(ddev, git): - """ - RC numbers less than 1 don't make any sense, so we abort if we get one. - """ - result = ddev('release', 'branch', 'tag', input='0\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input='0\ny\n') assert RC_NUMBER_PROMPT.format('12') in result.output assert result.exit_code == 1, result.output @@ -227,12 +271,8 @@ def test_abort_if_tag_less_than_one(ddev, git): ], ) def test_abort_valid_rc(ddev, git, no_confirm): - """ - The RC is fine but we don't confirm in the end, so we abort. - """ git.tags.return_value = [] - - result = ddev('release', 'branch', 'tag', input=f'\n{no_confirm}\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'\n{no_confirm}\n') assert RC_NUMBER_PROMPT.format('1') in result.output assert result.exit_code == 1, result.output @@ -243,7 +283,6 @@ def test_abort_valid_rc(ddev, git, no_confirm): 'rc_num_input, rc_num', [ pytest.param('', '1', id='implicit sequential'), - pytest.param('', '1', id='explicit sequential'), pytest.param('2', '2', id='explicit non-sequential'), ], ) @@ -251,12 +290,9 @@ def test_abort_valid_rc(ddev, git, no_confirm): def test_first_rc(ddev, git, rc_num_input, rc_num, tags, patch): """ First RC for a new release. - - We support starting with a number other than 1, though that's very unlikely to happen in practice. """ git.tags.return_value = tags - - result = ddev('release', 'branch', 'tag', input=f'{rc_num_input}\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num_input}\ny\n') _assert_tag_pushed(git, result, f'7.56.{patch}-rc.{rc_num}') assert RC_NUMBER_PROMPT.format('1') in result.output @@ -270,19 +306,154 @@ def test_first_rc(ddev, git, rc_num_input, rc_num, tags, patch): ], ) def test_final(ddev, git, latest_final_tag, expected_new_final_tag): - """ - Create final release tag. - """ git.tags.return_value.append(latest_final_tag) - result = ddev('release', 'branch', 'tag', '--final', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') _assert_tag_pushed(git, result, expected_new_final_tag) +def test_rc_with_explicit_value(ddev, git): + """ + `--rc N` pins the RC number without prompting. + """ + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '12', input='y\n') + _assert_tag_pushed(git, result, '7.56.0-rc.12') + assert RC_NUMBER_PROMPT.format('12') not in result.output + + +def test_rc_explicit_value_skips_ahead_warns(ddev, git): + """ + `--rc N` with N > expected_next emits a gap warning but proceeds. + """ + # Last RC is 11 (per EXAMPLE_TAGS), so expected next is 12. Skipping to 15. + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '15', input='y\n') + + _assert_tag_pushed(git, result, '7.56.0-rc.15') + assert 'skips ahead' in result.output + assert 'Missing RC number(s): 12, 13, 14' in result.output + assert '--rc 12 --ref' in result.output + + +def test_rc_explicit_value_no_gap_no_warning(ddev, git): + """ + `--rc N` matching the expected next number does not emit a gap warning. + """ + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '12', input='y\n') + _assert_tag_pushed(git, result, '7.56.0-rc.12') + assert 'skips ahead' not in result.output + + +@pytest.mark.parametrize('rc_arg', ['--rc=banana', '--rc=0']) +def test_rc_invalid_value_aborts(ddev, git, rc_arg): + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', rc_arg) + assert result.exit_code != 0, result.output + assert '`--rc` value must be a positive integer' in result.output + + +def test_final_and_rc_mutually_exclusive(ddev, git): + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--rc', '3') + assert result.exit_code != 0, result.output + assert 'mutually exclusive' in result.output + + +def test_yes_skips_all_confirmations(ddev, git): + """ + `--yes` skips both the "tag this release?" prompt and the final confirm. + Also skips the RC-number prompt by using the suggested value. + """ + result = ddev('release', 'branch', 'tag', '--yes') + _assert_tag_pushed(git, result, '7.56.0-rc.12') + assert RC_NUMBER_PROMPT.format('12') not in result.output + assert 'auto-yes' in result.output + assert 'Using auto-suggested RC number: 12' in result.output + + +def test_yes_with_pinned_rc(ddev, git): + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '20', '--yes') + _assert_tag_pushed(git, result, '7.56.0-rc.20') + assert 'skips ahead' in result.output + + +def _make_ref_dispatcher(rev_parse=None, is_ancestor=None): + """Build a `capture` side_effect that dispatches by subcommand for --ref tests. + + Delegates to `_capture_dispatch` for any subcommand it doesn't explicitly handle so the + default `ls-remote` payload stays defined in exactly one place. + """ + + def dispatch(*args): + sub = args[0] if args else '' + if sub == 'rev-parse': + if isinstance(rev_parse, BaseException): + raise rev_parse + return rev_parse + if sub == 'merge-base': + if isinstance(is_ancestor, BaseException): + raise is_ancestor + return is_ancestor + return _capture_dispatch(*args) + + return dispatch + + +def test_ref_validates_and_tags_at_commit(ddev, git): + """ + `--ref ` tags that commit instead of the branch tip. + """ + git.capture.side_effect = _make_ref_dispatcher(rev_parse='cafef00d\n', is_ancestor='') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--ref', 'cafef00d', input='y\n') + + assert result.exit_code == 0, result.output + assert 'at cafef00d?' in result.output + assert git.method_calls[-2:] == [ + c.tag('7.56.0', message='7.56.0', ref='cafef00d'), + c.push('7.56.0'), + ] + + +def test_ref_does_not_resolve_aborts(ddev, git): + git.capture.side_effect = _make_ref_dispatcher(rev_parse=OSError('bad ref')) + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--ref', 'nope', input='y\n') + assert result.exit_code == 1, result.output + assert 'does not resolve to a commit' in result.output + + +def test_ref_not_ancestor_aborts(ddev, git): + git.capture.side_effect = _make_ref_dispatcher(rev_parse='badf00d\n', is_ancestor=OSError('not ancestor')) + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--ref', 'badf00d', input='y\n') + assert result.exit_code == 1, result.output + assert 'is not an ancestor of' in result.output + + +def test_no_worktree_subprocess_invoked(ddev, git): + """ + The command must never touch `git worktree`. Previously it created a worktree to check out + `origin/` for tagging; now it operates against the ref directly. Worktree ops can + go through either `run` (add/remove) or `capture` (list), so both are filtered. + """ + git.current_branch.return_value = 'master' + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') + + assert result.exit_code == 0, result.output + worktree_calls = [ + call for call in git.method_calls if call[0] in ('run', 'capture') and call.args and call.args[0] == 'worktree' + ] + assert worktree_calls == [] + + +def test_local_release_branch_not_pulled(ddev, git): + """ + The command must never pull the user's local release branch. Tagging operates against + `origin/` only, so the user's local checkout state is irrelevant. + """ + ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') + git.pull.assert_not_called() + + def test_build_agent_yaml_already_updated_does_not_dispatch_workflow(ddev, git, mocker): dispatch_workflow = mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow') - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='y\n') _assert_tag_pushed(git, result, '7.56.0') dispatch_workflow.assert_not_called() @@ -294,14 +465,14 @@ def test_build_agent_yaml_points_to_main_warns_and_continues(ddev, basic_git, mo mocker.patch('ddev.cli.release.branch.tag._build_agent_yaml_points_to_main', return_value=True) dispatch_workflow = mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow') - result = ddev('release', 'branch', 'tag', '--skip-open-pr-check', input='\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--skip-open-pr-check', input='\ny\n') assert result.exit_code == 0, result.output assert '`.gitlab/build_agent.yaml` still points to `main`' in result.output assert 'Dispatched `update-build-agent-yaml.yml`' in result.output assert 'Tagging will continue.' in result.output dispatch_workflow.assert_called_once_with('update-build-agent-yaml.yml', 'master', {'branch': '7.56.x'}) - basic_git.tag.assert_called_once_with('7.56.0-rc.1', message='7.56.0-rc.1') + basic_git.tag.assert_called_once_with('7.56.0-rc.1', message='7.56.0-rc.1', ref=ORIGIN_REF) basic_git.push.assert_called_once_with('7.56.0-rc.1') @@ -311,7 +482,7 @@ def test_build_agent_yaml_workflow_dispatch_waits_for_tag_confirmation(ddev, bas mocker.patch('ddev.cli.release.branch.tag._build_agent_yaml_points_to_main', return_value=True) dispatch_workflow = mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow') - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='n\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='n\n') assert result.exit_code == 1, result.output assert NO_CONFIRMATION_SO_ABORT in result.output @@ -330,15 +501,12 @@ def test_build_agent_yaml_workflow_dispatch_failure_warns_and_continues(ddev, ba ), ) - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='y\n') assert result.exit_code == 0, result.output assert 'Warning: unable to trigger `update-build-agent-yaml.yml`: API error' in result.output assert 'gh workflow run update-build-agent-yaml.yml -f branch=7.56.x' in result.output assert 'Dispatched `update-build-agent-yaml.yml`' not in result.output dispatch_workflow.assert_called_once_with('update-build-agent-yaml.yml', 'master', {'branch': '7.56.x'}) - basic_git.tag.assert_called_once_with('7.56.0', message='7.56.0') + basic_git.tag.assert_called_once_with('7.56.0', message='7.56.0', ref=ORIGIN_REF) basic_git.push.assert_called_once_with('7.56.0') - - -# TODO: test for adding RCs for a bugfix release diff --git a/ddev/tests/cli/release/test_agent/__init__.py b/ddev/tests/cli/release/test_agent/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/cli/release/test_agent/test_command.py b/ddev/tests/cli/release/test_agent/test_command.py new file mode 100644 index 0000000000000..a726efa3b871c --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_command.py @@ -0,0 +1,373 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations + +import httpx +import pytest +from pytest_mock import MockerFixture + +from tests.helpers.github_async import DEFAULT_DISPATCH_HTML_URL, FakeAsyncGitHubClient +from tests.helpers.runner import CliRunner + +EXPECTED_INPUTS = { + 'test-py3': 'true', + 'test-py2': 'false', + 'agent-image': 'registry.datadoghq.com/agent:7.80.0-rc.3', + 'agent-image-windows': 'registry.datadoghq.com/agent:7.80.0-rc.3-servercore', +} + + +@pytest.fixture(autouse=True) +def _silence_git(mocker: MockerFixture) -> None: + """Default git mocks: `fetch_target` succeeds and `git show` returns workflow yaml.""" + mocker.patch('ddev.utils.git.GitRepository.run', return_value=None) + mocker.patch('ddev.utils.git.GitRepository.show_file', return_value='workflow yaml') + + +@pytest.fixture(autouse=True) +def _silence_registry(mocker: MockerFixture) -> None: + """Default registry mocks: every manifest exists and there is one RC tag available.""" + mocker.patch('ddev.cli.release.test_agent.registry.manifest_exists', return_value=True) + mocker.patch( + 'ddev.cli.release.test_agent.registry.list_agent_rc_tags', + return_value=['7.80.0-rc.1', '7.80.0-rc.3'], + ) + + +@pytest.mark.parametrize( + 'args, expected', + [ + pytest.param([], 'Exactly one of --branch or --tag', id='neither'), + pytest.param(['--branch', '7.80.x', '--tag', '7.80.0'], 'Cannot use --branch and --tag together', id='both'), + pytest.param(['--branch', '7.80'], 'Invalid branch', id='bad-branch'), + pytest.param(['--tag', '7.80'], 'Invalid tag', id='bad-tag'), + ], +) +def test_input_validation(ddev: CliRunner, args: list[str], expected: str) -> None: + result = ddev('release', 'test-agent', *args) + assert result.exit_code != 0, result.output + assert expected in result.output + + +def test_tag_with_leading_v_is_accepted(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient) -> None: + result = ddev('release', 'test-agent', '--tag', 'v7.80.0-rc.1', '--yes') + assert result.exit_code == 0, result.output + call = fake_async_github.last_call('create_workflow_dispatch') + assert call.kwargs['inputs']['agent-image'] == 'registry.datadoghq.com/agent:7.80.0-rc.1' + + +@pytest.mark.parametrize('workflow_id', ['test-agent.yml', 'test-agent-windows.yml']) +def test_branch_resolves_latest_rc_dispatches_both( + ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient, workflow_id: str +) -> None: + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code == 0, result.output + fake_async_github.assert_called_with( + 'create_workflow_dispatch', + owner='DataDog', + repo='integrations-core', + workflow_id=workflow_id, + ref='7.80.x', + inputs=EXPECTED_INPUTS, + timeout=None, + return_run_details=True, + ) + + +@pytest.mark.parametrize('workflow_id', ['test-agent.yml', 'test-agent-windows.yml']) +def test_tag_dispatches_both(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient, workflow_id: str) -> None: + """A bare `--tag` (no `v` prefix, no `-rc` suffix) must drive the same dispatch shape as `--branch`.""" + result = ddev('release', 'test-agent', '--tag', '7.80.0', '--yes') + + assert result.exit_code == 0, result.output + fake_async_github.assert_called_with( + 'create_workflow_dispatch', + owner='DataDog', + repo='integrations-core', + workflow_id=workflow_id, + ref='7.80.0', + inputs={ + 'test-py3': 'true', + 'test-py2': 'false', + 'agent-image': 'registry.datadoghq.com/agent:7.80.0', + 'agent-image-windows': 'registry.datadoghq.com/agent:7.80.0-servercore', + }, + timeout=None, + return_run_details=True, + ) + + +def test_dry_run_does_not_dispatch(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient) -> None: + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--dry-run') + + assert result.exit_code == 0, result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + assert 'Dry run' in result.output + + +def test_monitor_invokes_workflow_monitor( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, +) -> None: + monitor = mocker.patch('ddev.cli.release.test_agent.monitoring.monitor_dispatched_workflows', return_value=None) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes', '--monitor') + + assert result.exit_code == 0, result.output + monitor.assert_called_once() + _, token = monitor.call_args.args + assert token == 'ghp_test' + workflows = monitor.call_args.kwargs['workflows'] + assert monitor.call_args.kwargs['poll_interval'] == 10.0 + assert [workflow.label for workflow in workflows] == ['Linux', 'Windows'] + assert [workflow.html_url for workflow in workflows] == [DEFAULT_DISPATCH_HTML_URL, DEFAULT_DISPATCH_HTML_URL] + assert 'Workflows dispatched' not in result.output + assert DEFAULT_DISPATCH_HTML_URL not in result.output + + +def test_monitor_uses_requested_poll_interval( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, +) -> None: + monitor = mocker.patch('ddev.cli.release.test_agent.monitoring.monitor_dispatched_workflows', return_value=None) + + result = ddev( + 'release', + 'test-agent', + '--branch', + '7.80.x', + '--yes', + '--monitor', + '--poll-interval', + '15', + ) + + assert result.exit_code == 0, result.output + assert monitor.call_args.kwargs['poll_interval'] == 15.0 + + +def test_monitor_error_aborts_command( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, +) -> None: + mocker.patch( + 'ddev.cli.release.test_agent.monitoring.monitor_dispatched_workflows', + side_effect=RuntimeError('GitHub API unavailable'), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes', '--monitor') + + assert result.exit_code != 0, result.output + assert 'Failed to monitor workflows: GitHub API unavailable' in result.output + + +def test_branch_with_no_rcs_aborts( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + mocker.patch('ddev.cli.release.test_agent.registry.list_agent_rc_tags', return_value=[]) + + result = ddev('release', 'test-agent', '--branch', '7.99.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'No `7.99.*-rc.*` tags found' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_missing_image_aborts(ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient) -> None: + mocker.patch('ddev.cli.release.test_agent.registry.manifest_exists', return_value=False) + + result = ddev('release', 'test-agent', '--tag', '9.99.0-rc.1', '--yes') + + assert result.exit_code != 0, result.output + assert 'not found in registry.datadoghq.com' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_missing_ref_aborts(ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient) -> None: + """When `git fetch` reports the ref does not exist on origin, surface a clean abort.""" + mocker.patch( + 'ddev.utils.git.GitRepository.run', + side_effect=OSError("fatal: couldn't find remote ref refs/heads/7.80.x"), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Branch `7.80.x` not found on origin' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_fetch_target_is_called_with_branch_refspec( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """The command must fetch the branch into `origin/` before reading workflow files.""" + run_spy = mocker.patch('ddev.utils.git.GitRepository.run', return_value=None) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code == 0, result.output + run_spy.assert_any_call('fetch', '--quiet', '--depth=1', 'origin', '+refs/heads/7.80.x:refs/remotes/origin/7.80.x') + + +def test_fetch_target_is_called_with_tag_refspec( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """A `--tag` invocation must fetch into `refs/tags/` so `git show :path` works.""" + run_spy = mocker.patch('ddev.utils.git.GitRepository.run', return_value=None) + + result = ddev('release', 'test-agent', '--tag', '7.80.0-rc.1', '--yes') + + assert result.exit_code == 0, result.output + run_spy.assert_any_call('fetch', '--quiet', '--depth=1', 'origin', 'refs/tags/7.80.0-rc.1:refs/tags/7.80.0-rc.1') + + +def test_fetch_target_other_error_surfaces_original( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """Anything other than 'remote ref not found' must surface as a generic fetch-failed abort.""" + mocker.patch( + 'ddev.utils.git.GitRepository.run', + side_effect=OSError('fatal: unable to access https://github.com/...: Could not resolve host'), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Failed to fetch branch `7.80.x` from origin' in result.output + assert 'Could not resolve host' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_missing_workflow_file_aborts( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """A 'file exists on disk, but not in ' git error reads as the workflow really being absent.""" + mocker.patch( + 'ddev.utils.git.GitRepository.show_file', + side_effect=OSError( + "fatal: path '.github/workflows/test-agent.yml' exists on disk, but not in 'origin/7.80.x'" + ), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'missing required workflow file' in result.output + assert '7.80.x' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_unknown_git_failure_surfaces_original_error( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """Anything we don't recognise from git must bubble up — never silently look like a missing workflow.""" + mocker.patch( + 'ddev.utils.git.GitRepository.show_file', + side_effect=OSError('fatal: index file corrupt'), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Failed to read' in result.output + assert 'index file corrupt' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +@pytest.mark.parametrize( + 'failing_workflow, failing_label', + [ + pytest.param('test-agent-windows.yml', 'Windows', id='windows-fails'), + pytest.param('test-agent.yml', 'Linux', id='linux-fails'), + ], +) +def test_partial_dispatch_failure_surfaces_sibling_url( + ddev: CliRunner, + fake_async_github: FakeAsyncGitHubClient, + failing_workflow: str, + failing_label: str, +) -> None: + """When only one dispatch fails, the surviving side's URL must appear in the error message.""" + err = httpx.HTTPStatusError( + 'forbidden', + request=httpx.Request('POST', 'https://api.github.com/'), + response=httpx.Response(403), + ) + fake_async_github.mock_response('create_workflow_dispatch', err, workflow_id=failing_workflow) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert f'{failing_label} dispatch failed' in result.output + assert DEFAULT_DISPATCH_HTML_URL in result.output + + +def test_both_dispatches_fail_combine_messages(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient) -> None: + """Both-fail must announce itself explicitly and surface both error reprs, not just substrings. + + Asserting on `forbidden` appearing twice catches the previous regression where `add_note` + was used to attach the Windows error — `str(exc)` does not include notes, so the Windows + side was silently dropped from the abort message. + """ + err = httpx.HTTPStatusError( + 'forbidden', + request=httpx.Request('POST', 'https://api.github.com/'), + response=httpx.Response(403), + ) + fake_async_github.mock_response('create_workflow_dispatch', err) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Both dispatches failed' in result.output + assert 'Linux:' in result.output + assert 'Windows:' in result.output + assert result.output.count('forbidden') == 2 + + +def test_missing_github_token_aborts(ddev: CliRunner, mocker: MockerFixture, config_file) -> None: + """The empty-token guard must abort before any dispatch attempt.""" + config_file.model.github.token = '' + config_file.save() + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'GitHub token required' in result.output + + +@pytest.mark.parametrize( + 'mock_target, expected_message', + [ + pytest.param( + 'ddev.cli.release.test_agent.registry.list_agent_rc_tags', + 'Failed to query registry.datadoghq.com for tags', + id='list_tags', + ), + pytest.param( + 'ddev.cli.release.test_agent.registry.manifest_exists', + 'Failed to query registry.datadoghq.com for', + id='manifest_exists', + ), + ], +) +def test_registry_http_error_aborts_cleanly( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, + mock_target: str, + expected_message: str, +) -> None: + """A transient httpx error from the registry must surface via app.abort, not a raw traceback.""" + mocker.patch(mock_target, side_effect=httpx.ConnectError('connection refused')) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert expected_message in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') diff --git a/ddev/tests/cli/release/test_agent/test_dispatch.py b/ddev/tests/cli/release/test_agent/test_dispatch.py new file mode 100644 index 0000000000000..f8bc99a0f1401 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_dispatch.py @@ -0,0 +1,94 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Direct unit tests for `dispatch.extract_dispatched_workflows`. + +The CLI-level tests in `test_command.py` only ever raise `httpx.HTTPStatusError` (an +`Exception`) from the dispatch call, so they do not exercise the +`BaseException`-but-not-`Exception` re-raise guard that lets `CancelledError` and +`KeyboardInterrupt` propagate. These tests pin that contract on the helper directly. +""" + +from __future__ import annotations + +import asyncio +from typing import Any + +import pytest + +from ddev.cli.release.test_agent.dispatch import extract_dispatched_workflows +from ddev.utils.github_async import GitHubResponse +from ddev.utils.github_async.models import WorkflowDispatchResult + + +def _ok(run_id: int, html_url: str) -> GitHubResponse[WorkflowDispatchResult]: + return GitHubResponse( + data=WorkflowDispatchResult( + workflow_run_id=run_id, + run_url=f'https://api.github.com/{html_url}', + html_url=html_url, + ), + headers={}, + ) + + +def test_extract_dispatched_workflows_returns_both_runs() -> None: + results = (_ok(1, 'https://github.com/x/runs/1'), _ok(2, 'https://github.com/x/runs/2')) + + linux, windows = extract_dispatched_workflows(results) + assert linux.label == 'Linux' + assert linux.workflow_id == 'test-agent.yml' + assert linux.run_id == 1 + assert linux.html_url == 'https://github.com/x/runs/1' + assert windows.label == 'Windows' + assert windows.workflow_id == 'test-agent-windows.yml' + assert windows.run_id == 2 + assert windows.html_url == 'https://github.com/x/runs/2' + + +@pytest.mark.parametrize( + 'flow_control_exc', + [ + pytest.param(asyncio.CancelledError('user pressed ctrl-c'), id='cancelled-error'), + pytest.param(KeyboardInterrupt(), id='keyboard-interrupt'), + ], +) +@pytest.mark.parametrize( + 'position', + [ + pytest.param('linux', id='linux-cancelled'), + pytest.param('windows', id='windows-cancelled'), + ], +) +def test_extract_dispatched_workflows_reraises_flow_control_exceptions( + flow_control_exc: BaseException, position: str +) -> None: + """`CancelledError` / `KeyboardInterrupt` in either slot must propagate verbatim, not be wrapped.""" + other = _ok(1, 'https://github.com/x/runs/sibling') + results: tuple[Any, Any] = (flow_control_exc, other) if position == 'linux' else (other, flow_control_exc) + + with pytest.raises(type(flow_control_exc)): + extract_dispatched_workflows(results) + + +def test_extract_dispatched_workflows_wraps_regular_exception_failure_as_runtime_error() -> None: + """Sanity check that the `Exception` branch still produces a wrapped RuntimeError.""" + err = RuntimeError('forbidden') + results = (err, _ok(2, 'https://github.com/x/runs/2')) + + with pytest.raises(RuntimeError, match=r'Linux dispatch failed:.*forbidden.*runs/2'): + extract_dispatched_workflows(results) + + +def test_extract_dispatched_workflows_both_failures_includes_both_reprs() -> None: + """Both-fail must surface both error reprs in the RuntimeError message (not via __notes__).""" + linux_err = RuntimeError('linux-side detail') + windows_err = RuntimeError('windows-side detail') + + with pytest.raises(RuntimeError) as excinfo: + extract_dispatched_workflows((linux_err, windows_err)) + + message = str(excinfo.value) + assert 'Both dispatches failed' in message + assert 'linux-side detail' in message + assert 'windows-side detail' in message diff --git a/ddev/tests/cli/release/test_agent/test_monitoring.py b/ddev/tests/cli/release/test_agent/test_monitoring.py new file mode 100644 index 0000000000000..1e65466198af9 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_monitoring.py @@ -0,0 +1,312 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations + +import importlib +from typing import Any + +from ddev.cli.release.test_agent.dispatch import DispatchedWorkflow +from ddev.cli.release.test_agent.monitoring import ( + JobState, + MonitorState, + WorkflowState, + collect_monitor_state, + monitor_workflows, + render_monitor_panel, +) +from ddev.cli.terminal import Terminal +from ddev.utils.github_async import GitHubResponse +from ddev.utils.github_async.models import WorkflowJob, WorkflowJobsList, WorkflowRun +from tests.helpers.github_async import FakeAsyncGitHubClient + +monitoring_module = importlib.import_module('ddev.cli.release.test_agent.monitoring') + + +def test_render_monitor_panel_shows_workflow_summary() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + state = MonitorState( + workflows=[ + WorkflowState( + label='Linux', + run_id=1, + status='in_progress', + conclusion=None, + html_url='https://github.com/DataDog/integrations-core/actions/runs/1', + jobs=[ + JobState( + name='test-agent / py3.12', + status='in_progress', + conclusion=None, + html_url='https://github.com/DataDog/integrations-core/actions/runs/1/job/10', + ) + ], + ) + ] + ) + + with terminal.console.capture() as capture: + terminal.console.print(render_monitor_panel(terminal, ref='7.80.x', poll_interval=30, state=state)) + + output = capture.get() + assert 'Agent test workflow monitor' in output + assert 'Ref: 7.80.x' in output + assert 'Polling: every 30s' in output + assert 'Linux' in output + assert 'Waiting: 0' in output + assert 'Running: 1' in output + assert 'Succeeded: 0' in output + assert 'Failed: 0' in output + assert 'Skipped: 0' in output + + +def test_render_monitor_panel_includes_failed_job_url() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + job_url = 'https://github.com/DataDog/integrations-core/actions/runs/2/job/20' + state = MonitorState( + workflows=[ + WorkflowState( + label='Windows', + run_id=2, + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/2', + jobs=[ + JobState( + name='test-agent-windows / py3.12', + status='completed', + conclusion='failure', + html_url=job_url, + ) + ], + ) + ] + ) + + with terminal.console.capture() as capture: + terminal.console.print(render_monitor_panel(terminal, ref='7.80.x', poll_interval=30, state=state)) + + output = capture.get() + assert 'Windows' in output + assert 'Failures' in output + assert 'test-agent-windows / py3.12' in output + assert 'actions/runs/2/job/' in output + assert '20' in output + + +def test_render_monitor_panel_counts_skipped_jobs_without_listing_them() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + state = MonitorState( + workflows=[ + WorkflowState( + label='Linux', + run_id=1, + status='completed', + conclusion='success', + html_url='https://github.com/DataDog/integrations-core/actions/runs/1', + jobs=[ + JobState('successful job', 'completed', 'success', None), + JobState('skipped job', 'completed', 'skipped', None), + JobState('failed job', 'completed', 'failure', 'https://github.com/job/failed'), + ], + ) + ] + ) + + with terminal.console.capture() as capture: + terminal.console.print(render_monitor_panel(terminal, ref='7.80.x', poll_interval=30, state=state)) + + output = capture.get() + assert 'Skipped: 1' in output + assert 'Failed: 1' in output + assert 'failed job' in output + assert 'skipped job' not in output + assert 'successful job' not in output + + +def test_monitor_state_is_complete_only_when_all_workflows_complete() -> None: + state = MonitorState( + workflows=[ + WorkflowState('Linux', 1, 'completed', 'success', 'https://github.com/runs/1', []), + WorkflowState('Windows', 2, 'in_progress', None, 'https://github.com/runs/2', []), + ] + ) + + assert not state.is_complete + + completed = MonitorState( + workflows=[ + WorkflowState('Linux', 1, 'completed', 'success', 'https://github.com/runs/1', []), + WorkflowState('Windows', 2, 'completed', 'failure', 'https://github.com/runs/2', []), + ] + ) + + assert completed.is_complete + + +def test_dispatched_workflow_has_run_metadata() -> None: + workflow = DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + + assert workflow.run_id == 123 + + +async def test_collect_monitor_state_fetches_workflow_jobs(fake_async_github: FakeAsyncGitHubClient) -> None: + fake_async_github.mock_response( + 'get_workflow_run', + WorkflowRun( + id=123, + name='test-agent', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ), + ) + fake_async_github.mock_response( + 'list_workflow_jobs', + WorkflowJobsList( + total_count=1, + jobs=[ + WorkflowJob( + id=456, + run_id=123, + name='test-agent / py3.12', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123/job/456', + ) + ], + ), + ) + + state = await collect_monitor_state( + fake_async_github, + [ + DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + ], + ) + + assert state.is_complete + assert state.workflows[0].conclusion == 'failure' + assert ( + state.workflows[0].jobs[0].html_url == 'https://github.com/DataDog/integrations-core/actions/runs/123/job/456' + ) + + +async def test_monitor_workflows_does_not_raise_on_job_failure(fake_async_github: FakeAsyncGitHubClient) -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + fake_async_github.mock_response( + 'get_workflow_run', + GitHubResponse( + data=WorkflowRun( + id=123, + name='test-agent', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ), + headers={}, + ), + ) + fake_async_github.mock_response( + 'list_workflow_jobs', + GitHubResponse( + data=WorkflowJobsList( + total_count=1, + jobs=[ + WorkflowJob( + id=456, + run_id=123, + name='test-agent / py3.12', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123/job/456', + ) + ], + ), + headers={}, + ), + ) + + await monitor_workflows( + terminal, + fake_async_github, + ref='7.80.x', + workflows=[ + DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + ], + poll_interval=0, + ) + + +async def test_monitor_workflows_does_not_use_alternate_screen( + fake_async_github: FakeAsyncGitHubClient, monkeypatch +) -> None: + terminal = Terminal(verbosity=0, enable_color=True, interactive=True) + live_kwargs: dict[str, Any] = {} + + class FakeLive: + def __init__(self, *args: Any, **kwargs: Any) -> None: + live_kwargs.update(kwargs) + + def __enter__(self) -> FakeLive: + return self + + def __exit__(self, *args: Any) -> None: + pass + + def update(self, *args: Any, **kwargs: Any) -> None: + pass + + monkeypatch.setattr(monitoring_module, 'Live', FakeLive) + fake_async_github.mock_response( + 'get_workflow_run', + GitHubResponse( + data=WorkflowRun( + id=123, + name='test-agent', + status='completed', + conclusion='success', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ), + headers={}, + ), + ) + fake_async_github.mock_response( + 'list_workflow_jobs', + GitHubResponse(data=WorkflowJobsList(total_count=0, jobs=[]), headers={}), + ) + + await monitor_workflows( + terminal, + fake_async_github, + ref='7.80.x', + workflows=[ + DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + ], + poll_interval=0, + ) + + assert live_kwargs.get('screen') is not True + assert live_kwargs['auto_refresh'] is True + assert live_kwargs['refresh_per_second'] == 10 + assert live_kwargs['vertical_overflow'] == 'crop' diff --git a/ddev/tests/cli/release/test_agent/test_registry.py b/ddev/tests/cli/release/test_agent/test_registry.py new file mode 100644 index 0000000000000..cd9018b2e8283 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_registry.py @@ -0,0 +1,90 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Tests for the agent-specific wrapper around `ddev.utils.docker_registry`. + +The registry transport itself is covered by `tests/utils/test_docker_registry.py`. +These tests focus on the RC filter and version sort layered on top. +""" + +from __future__ import annotations + +import httpx +import pytest +from pytest_mock import MockerFixture + +from ddev.cli.release.test_agent.registry import list_agent_rc_tags, manifest_exists + + +def test_manifest_exists_delegates_to_utility(mocker: MockerFixture) -> None: + spy = mocker.patch('ddev.utils.docker_registry.manifest_exists', return_value=True) + + assert manifest_exists('7.80.0-rc.1') is True + spy.assert_called_once_with('agent', '7.80.0-rc.1', timeout=10.0) + + +def test_list_agent_rc_tags_filters_and_sorts(mocker: MockerFixture) -> None: + mocker.patch( + 'ddev.utils.docker_registry.list_tags', + return_value=[ + '7.79.0-rc.1', + '7.80.0-rc.10', + '7.80.0-rc.2', + '7.80.0-rc.1', + '7.80.0', + '7.80.0-rc.1-servercore', + '7-rc', + 'latest', + ], + ) + + assert list_agent_rc_tags(7, 80) == ['7.80.0-rc.1', '7.80.0-rc.2', '7.80.0-rc.10'] + + +def test_list_agent_rc_tags_includes_later_patch_rcs(mocker: MockerFixture) -> None: + mocker.patch( + 'ddev.utils.docker_registry.list_tags', + return_value=[ + '7.79.0-rc.1', + '7.79.0-rc.8', + '7.79.1-rc.1', + '7.79.1-rc.2', + '7.79.0', + '7.79.1-rc.1-servercore', + '7.80.0-rc.1', + ], + ) + + assert list_agent_rc_tags(7, 79) == [ + '7.79.0-rc.1', + '7.79.0-rc.8', + '7.79.1-rc.1', + '7.79.1-rc.2', + ] + + +@pytest.mark.parametrize( + 'all_tags', + [ + pytest.param([], id='empty-list'), + pytest.param(['7.79.0-rc.1', '7.81.0-rc.1', 'latest'], id='no-matching-minor'), + ], +) +def test_list_agent_rc_tags_returns_empty_when_no_match(mocker: MockerFixture, all_tags: list[str]) -> None: + mocker.patch('ddev.utils.docker_registry.list_tags', return_value=all_tags) + + assert list_agent_rc_tags(7, 80) == [] + + +@pytest.mark.parametrize( + 'exc', + [ + pytest.param(httpx.ConnectError('connection refused'), id='connect-error'), + pytest.param(httpx.ReadTimeout('read timed out'), id='read-timeout'), + ], +) +def test_list_agent_rc_tags_propagates_network_errors(mocker: MockerFixture, exc: Exception) -> None: + mocker.patch('ddev.utils.docker_registry.list_tags', side_effect=exc) + + with pytest.raises(type(exc)): + list_agent_rc_tags(7, 80) diff --git a/ddev/tests/cli/test_terminal.py b/ddev/tests/cli/test_terminal.py new file mode 100644 index 0000000000000..5fcbfc51f1da8 --- /dev/null +++ b/ddev/tests/cli/test_terminal.py @@ -0,0 +1,42 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations + +from rich.text import Text + +from ddev.cli.terminal import Terminal + + +def test_labeled_text_formats_label_and_value() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + + text = terminal.labeled_text('Workflows', 'test-agent.yml, test-agent-windows.yml') + + assert text.plain == 'Workflows: test-agent.yml, test-agent-windows.yml' + assert text.spans[0].start == 0 + assert text.spans[0].end == len('Workflows: ') + + +def test_labeled_text_accepts_rich_value() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + value = Text('https://github.com/DataDog/integrations-core/actions/runs/1', style='link https://github.com') + + text = terminal.labeled_text('Linux', value) + + assert text.plain == 'Linux: https://github.com/DataDog/integrations-core/actions/runs/1' + assert any(span.style == 'link https://github.com' for span in text.spans) + + +def test_labeled_lines_aligns_labels() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + + text = terminal.labeled_lines( + [ + ('Ref', '7.80.x'), + ('Resolved RC', '7.80.0-rc.3'), + ], + indent=' ', + ) + + assert text.plain == ' Ref: 7.80.x\n Resolved RC: 7.80.0-rc.3' diff --git a/ddev/tests/helpers/github_async.py b/ddev/tests/helpers/github_async.py index 2986a7bbf44b4..28ad713ed6e11 100644 --- a/ddev/tests/helpers/github_async.py +++ b/ddev/tests/helpers/github_async.py @@ -37,6 +37,7 @@ def test_thing(fake_async_github): from collections.abc import AsyncIterator, Callable from dataclasses import dataclass, field +from pathlib import Path from typing import Any import httpx @@ -48,9 +49,14 @@ def test_thing(fake_async_github): Label, PullRequest, WorkflowDispatchResult, + WorkflowJobsList, WorkflowRun, ) +# Stable URL baked into the default `create_workflow_dispatch` response. Exported so tests +# that assert on the URL can reference the helper rather than duplicating the literal. +DEFAULT_DISPATCH_HTML_URL = 'https://github.com/test/repo/actions/runs/1' + @dataclass class RecordedRequest: @@ -86,7 +92,11 @@ def _default_response_factories() -> dict[str, Callable[[], Any]]: response=httpx.Response(404), ), 'create_workflow_dispatch': lambda: GitHubResponse( - data=WorkflowDispatchResult(workflow_run_id=123), + data=WorkflowDispatchResult( + workflow_run_id=123, + run_url='https://api.github.com/repos/test/repo/actions/runs/123', + html_url=DEFAULT_DISPATCH_HTML_URL, + ), headers={}, ), # Default to a completed/successful run so happy-path tests don't have to register one. @@ -127,6 +137,11 @@ def _default_response_factories() -> dict[str, Callable[[], Any]]: data=ArtifactsList(total_count=0, artifacts=[]), headers={}, ), + # An empty page; tests that need jobs register their own WorkflowJobsList. + 'list_workflow_jobs': lambda: GitHubResponse( + data=WorkflowJobsList(total_count=0, jobs=[]), + headers={}, + ), # Download is a side-effecting no-op by default; per-URL failures are registered explicitly. 'download_artifact': lambda: None, } @@ -253,7 +268,9 @@ async def create_workflow_dispatch( ref: str, inputs: dict[str, str] | None = None, timeout: float | None = None, - ) -> GitHubResponse[WorkflowDispatchResult]: + *, + return_run_details: bool = False, + ) -> GitHubResponse[Any]: return self._call( 'create_workflow_dispatch', owner=owner, @@ -262,6 +279,7 @@ async def create_workflow_dispatch( ref=ref, inputs=inputs, timeout=timeout, + return_run_details=return_run_details, ) async def get_workflow_run( @@ -357,6 +375,36 @@ async def list_workflow_run_artifacts( else: yield GitHubResponse.model_validate({'data': page, 'headers': {}}) + async def list_workflow_jobs( + self, + owner: str, + repo: str, + run_id: int, + per_page: int = 30, + timeout: float | None = None, + ) -> AsyncIterator[GitHubResponse[WorkflowJobsList]]: + """Async-generator mirror. A registered response may be a single page or a list of pages.""" + self._record( + 'list_workflow_jobs', + owner=owner, + repo=repo, + run_id=run_id, + per_page=per_page, + timeout=timeout, + ) + response = self._resolve_response( + 'list_workflow_jobs', + {'owner': owner, 'repo': repo, 'run_id': run_id, 'per_page': per_page, 'timeout': timeout}, + ) + if isinstance(response, BaseException): + raise response + pages = response if isinstance(response, list) else [response] + for page in pages: + if isinstance(page, GitHubResponse): + yield page + else: + yield GitHubResponse.model_validate({'data': page, 'headers': {}}) + async def download_artifact( self, archive_download_url: str, @@ -376,6 +424,7 @@ async def download_artifact( ) if isinstance(response, BaseException): raise response + Path(dest_path).mkdir(parents=True, exist_ok=True) return None async def aclose(self) -> None: diff --git a/ddev/tests/utils/test_docker_registry.py b/ddev/tests/utils/test_docker_registry.py new file mode 100644 index 0000000000000..810eb79f584f8 --- /dev/null +++ b/ddev/tests/utils/test_docker_registry.py @@ -0,0 +1,169 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Tests for the generic Docker Registry v2 helpers in `ddev.utils.docker_registry`.""" + +from __future__ import annotations + +import httpx +import pytest +from pytest_mock import MockerFixture + +from ddev.utils import docker_registry + + +def _response( + status: int, + *, + json_body: object | None = None, + headers: dict[str, str] | None = None, + method: str = 'GET', + url: str = 'https://registry.datadoghq.com/v2/agent/tags/list', +) -> httpx.Response: + request = httpx.Request(method, url) + kwargs: dict[str, object] = {'request': request} + if json_body is not None: + kwargs['json'] = json_body + if headers is not None: + kwargs['headers'] = headers + return httpx.Response(status, **kwargs) + + +@pytest.mark.parametrize( + 'status_code, expected', + [ + pytest.param(200, True, id='exists'), + pytest.param(404, False, id='missing'), + ], +) +def test_manifest_exists_resolves_status(mocker: MockerFixture, status_code: int, expected: bool) -> None: + mocker.patch('httpx.head', return_value=_response(status_code, method='HEAD')) + + assert docker_registry.manifest_exists('agent', '7.80.0-rc.1') is expected + + +@pytest.mark.parametrize('status', [401, 403, 500, 503]) +def test_manifest_exists_raises_on_other_errors(mocker: MockerFixture, status: int) -> None: + mocker.patch('httpx.head', return_value=_response(status, method='HEAD')) + + with pytest.raises(httpx.HTTPStatusError): + docker_registry.manifest_exists('agent', '7.80.0-rc.1') + + +@pytest.mark.parametrize( + 'exc', + [ + pytest.param(httpx.ConnectError('connection refused'), id='connect-error'), + pytest.param(httpx.ReadTimeout('read timed out'), id='read-timeout'), + pytest.param(httpx.ConnectTimeout('connect timed out'), id='connect-timeout'), + ], +) +def test_manifest_exists_propagates_network_errors(mocker: MockerFixture, exc: Exception) -> None: + mocker.patch('httpx.head', side_effect=exc) + + with pytest.raises(type(exc)): + docker_registry.manifest_exists('agent', '7.80.0-rc.1') + + +def test_manifest_exists_uses_custom_host(mocker: MockerFixture) -> None: + spy = mocker.patch('httpx.head', return_value=_response(200, method='HEAD')) + + docker_registry.manifest_exists('repo', 'tag', host='private.registry.example') + + spy.assert_called_once() + assert spy.call_args.args[0] == 'https://private.registry.example/v2/repo/manifests/tag' + + +def test_list_tags_returns_single_page(mocker: MockerFixture) -> None: + mocker.patch('httpx.get', return_value=_response(200, json_body={'name': 'agent', 'tags': ['a', 'b', 'c']})) + + assert docker_registry.list_tags('agent') == ['a', 'b', 'c'] + + +@pytest.mark.parametrize( + 'payload', + [ + pytest.param({'name': 'agent', 'tags': []}, id='empty-list'), + pytest.param({'name': 'agent', 'tags': None}, id='null-tags'), + pytest.param({'name': 'agent'}, id='missing-key'), + ], +) +def test_list_tags_handles_missing_tags(mocker: MockerFixture, payload: dict[str, object]) -> None: + mocker.patch('httpx.get', return_value=_response(200, json_body=payload)) + + assert docker_registry.list_tags('agent') == [] + + +def test_list_tags_follows_relative_link_header(mocker: MockerFixture) -> None: + """A `Link: ; rel="next"` header on page 1 must be followed before returning.""" + page_1 = _response( + 200, + json_body={'name': 'agent', 'tags': ['a', 'b']}, + headers={'Link': '; rel="next"'}, + ) + page_2 = _response( + 200, + json_body={'name': 'agent', 'tags': ['c', 'd']}, + ) + get = mocker.patch('httpx.get', side_effect=[page_1, page_2]) + + result = docker_registry.list_tags('agent', page_size=2) + + assert result == ['a', 'b', 'c', 'd'] + assert get.call_count == 2 + assert get.call_args_list[1].args[0] == 'https://registry.datadoghq.com/v2/agent/tags/list?n=2&last=b' + + +def test_list_tags_follows_absolute_link_header(mocker: MockerFixture) -> None: + page_1 = _response( + 200, + json_body={'name': 'agent', 'tags': ['a']}, + headers={'Link': '; rel="next"'}, + ) + page_2 = _response(200, json_body={'name': 'agent', 'tags': ['b']}) + get = mocker.patch('httpx.get', side_effect=[page_1, page_2]) + + result = docker_registry.list_tags('agent') + + assert result == ['a', 'b'] + assert get.call_args_list[1].args[0] == 'https://other.example/v2/agent/tags/list?last=a' + + +def test_list_tags_stops_when_link_header_absent(mocker: MockerFixture) -> None: + """If the final page omits the Link header, iteration ends — no infinite loop.""" + page = _response(200, json_body={'name': 'agent', 'tags': ['only']}) + get = mocker.patch('httpx.get', return_value=page) + + result = docker_registry.list_tags('agent') + + assert result == ['only'] + assert get.call_count == 1 + + +def test_list_tags_ignores_non_next_link_relations(mocker: MockerFixture) -> None: + """Other rels like `last`, `prev`, `up` must not be followed as if they were `next`.""" + page = _response( + 200, + json_body={'name': 'agent', 'tags': ['only']}, + headers={'Link': '; rel="last", ; rel="prev"'}, + ) + get = mocker.patch('httpx.get', return_value=page) + + result = docker_registry.list_tags('agent') + + assert result == ['only'] + assert get.call_count == 1 + + +@pytest.mark.parametrize( + 'exc', + [ + pytest.param(httpx.ConnectError('connection refused'), id='connect-error'), + pytest.param(httpx.ReadTimeout('read timed out'), id='read-timeout'), + ], +) +def test_list_tags_propagates_network_errors(mocker: MockerFixture, exc: Exception) -> None: + mocker.patch('httpx.get', side_effect=exc) + + with pytest.raises(type(exc)): + docker_registry.list_tags('agent') diff --git a/ddev/tests/utils/test_github_async.py b/ddev/tests/utils/test_github_async.py index 259620728afa5..d8a3cc1c8dde8 100644 --- a/ddev/tests/utils/test_github_async.py +++ b/ddev/tests/utils/test_github_async.py @@ -27,6 +27,8 @@ PullRequestRef, PullRequestReviewComment, WorkflowDispatchResult, + WorkflowJob, + WorkflowJobsList, WorkflowRun, ) @@ -72,6 +74,17 @@ def _workflow_run_payload() -> dict[str, Any]: } +def _workflow_job(idx: int, *, status: str = "completed", conclusion: str | None = "success") -> dict[str, Any]: + return { + "id": idx, + "run_id": 42, + "name": f"job-{idx}", + "status": status, + "conclusion": conclusion, + "html_url": f"https://github.com/owner/repo/actions/runs/42/job/{idx}", + } + + def _issue_comment_payload() -> dict[str, Any]: return { "id": 1, @@ -196,10 +209,17 @@ def handler(request: httpx.Request) -> httpx.Response: assert body["ref"] == "main" assert body["return_run_details"] is True assert "inputs" not in body - return _json_response({"workflow_run_id": 999}, headers={"x-ratelimit-remaining": "59"}) + return _json_response( + { + "workflow_run_id": 999, + "run_url": "https://api.github.com/repos/owner/repo/actions/runs/999", + "html_url": "https://github.com/owner/repo/actions/runs/999", + }, + headers={"x-ratelimit-remaining": "59"}, + ) client = _make_client(httpx.MockTransport(handler)) - result = await client.create_workflow_dispatch("owner", "repo", "my-workflow.yml", "main") + result = await client.create_workflow_dispatch("owner", "repo", "my-workflow.yml", "main", return_run_details=True) assert isinstance(result, GitHubResponse) assert isinstance(result.data, WorkflowDispatchResult) assert result.data.workflow_run_id == 999 @@ -211,10 +231,18 @@ def handler(request: httpx.Request) -> httpx.Response: body = json.loads(request.content) assert body["inputs"] == {"env": "prod"} assert body["return_run_details"] is True - return _json_response({"workflow_run_id": 1}) + return _json_response( + { + "workflow_run_id": 1, + "run_url": "https://api.github.com/repos/o/r/actions/runs/1", + "html_url": "https://github.com/o/r/actions/runs/1", + } + ) client = _make_client(httpx.MockTransport(handler)) - result = await client.create_workflow_dispatch("o", "r", 123, "main", inputs={"env": "prod"}) + result = await client.create_workflow_dispatch( + "o", "r", 123, "main", inputs={"env": "prod"}, return_run_details=True + ) assert result.data.workflow_run_id == 1 @@ -225,6 +253,34 @@ async def test_create_workflow_dispatch_http_error_raises() -> None: assert exc_info.value.response.status_code == 422 +async def test_create_workflow_dispatch_return_run_details_parses_response() -> None: + payload = { + "workflow_run_id": 987, + "run_url": "https://api.github.com/repos/o/r/actions/runs/987", + "html_url": "https://github.com/o/r/actions/runs/987", + } + + def handler(request: httpx.Request) -> httpx.Response: + body = json.loads(request.content) + assert body["return_run_details"] is True + return _json_response(payload) + + client = _make_client(httpx.MockTransport(handler)) + result = await client.create_workflow_dispatch("o", "r", "wf.yml", "main", return_run_details=True) + assert result.data.workflow_run_id == 987 + assert result.data.html_url == "https://github.com/o/r/actions/runs/987" + + +async def test_create_workflow_dispatch_omits_return_run_details_when_false() -> None: + def handler(request: httpx.Request) -> httpx.Response: + body = json.loads(request.content) + assert "return_run_details" not in body + return httpx.Response(204) + + client = _make_client(httpx.MockTransport(handler)) + await client.create_workflow_dispatch("o", "r", "wf.yml", "main") + + # --------------------------------------------------------------------------- # get_workflow_run # --------------------------------------------------------------------------- @@ -336,6 +392,62 @@ def handler(request: httpx.Request) -> httpx.Response: pass +# --------------------------------------------------------------------------- +# list_workflow_jobs (pagination) +# --------------------------------------------------------------------------- + + +async def test_list_workflow_jobs_single_page() -> None: + jobs = [_workflow_job(1), _workflow_job(2, status="in_progress", conclusion=None)] + + def handler(request: httpx.Request) -> httpx.Response: + assert request.method == "GET" + assert "/actions/runs/42/jobs" in request.url.path + return _json_response({"total_count": 2, "jobs": jobs}) + + client = _make_client(httpx.MockTransport(handler)) + pages = [] + async for page in client.list_workflow_jobs("owner", "repo", 42): + pages.append(page) + + assert len(pages) == 1 + assert isinstance(pages[0].data, WorkflowJobsList) + assert pages[0].data.total_count == 2 + assert isinstance(pages[0].data.jobs[0], WorkflowJob) + assert pages[0].data.jobs[1].html_url == "https://github.com/owner/repo/actions/runs/42/job/2" + + +async def test_list_workflow_jobs_two_pages() -> None: + call_count = 0 + + def handler(request: httpx.Request) -> httpx.Response: + nonlocal call_count + call_count += 1 + if call_count == 1: + link = f'<{request.url.scheme}://{request.url.host}/page2>; rel="next"' + return _json_response({"total_count": 2, "jobs": [_workflow_job(1)]}, headers={"link": link}) + return _json_response({"total_count": 2, "jobs": [_workflow_job(2)]}) + + client = _make_client(httpx.MockTransport(handler)) + pages = [] + async for page in client.list_workflow_jobs("owner", "repo", 42): + pages.append(page) + + assert len(pages) == 2 + assert pages[0].data.jobs[0].id == 1 + assert pages[1].data.jobs[0].id == 2 + + +async def test_list_workflow_jobs_per_page_forwarded() -> None: + def handler(request: httpx.Request) -> httpx.Response: + assert request.url.params["per_page"] == "100" + return _json_response({"total_count": 0, "jobs": []}) + + client = _make_client(httpx.MockTransport(handler)) + async for _ in client.list_workflow_jobs("owner", "repo", 42, per_page=100): + pass + + # --------------------------------------------------------------------------- # create_issue_comment # --------------------------------------------------------------------------- diff --git a/kafka_consumer/assets/configuration/spec.yaml b/kafka_consumer/assets/configuration/spec.yaml index 2a8e6cc9b8929..45cc95e6a5364 100644 --- a/kafka_consumer/assets/configuration/spec.yaml +++ b/kafka_consumer/assets/configuration/spec.yaml @@ -470,6 +470,10 @@ files: Kafka Connect REST API URL or list of URLs. When set with enable_cluster_monitoring, collects connector metrics and configuration events from the Kafka Connect REST API. Accepts a single URL string or a list of URLs to monitor multiple Connect clusters. + For Confluent Cloud, set this to + ``https://api.confluent.cloud/connect/v1/environments//clusters/`` and supply the + Cloud API key (the key ID) as ``kafka_connect_username`` and its API secret as + ``kafka_connect_password``. fleet_configurable: true formats: - url diff --git a/kafka_consumer/changelog.d/24033.added b/kafka_consumer/changelog.d/24033.added new file mode 100644 index 0000000000000..56e647762a612 --- /dev/null +++ b/kafka_consumer/changelog.d/24033.added @@ -0,0 +1 @@ +Support monitoring Confluent Cloud managed connectors by pointing kafka_connect_url at the Confluent Cloud Connect REST API. diff --git a/kafka_consumer/changelog.d/24263.fixed b/kafka_consumer/changelog.d/24263.fixed new file mode 100644 index 0000000000000..f752742ce3afb --- /dev/null +++ b/kafka_consumer/changelog.d/24263.fixed @@ -0,0 +1 @@ +Continue collecting high-watermark offsets for healthy partitions when an individual partition's offset lookup fails, instead of aborting the check. diff --git a/kafka_consumer/datadog_checks/kafka_consumer/client.py b/kafka_consumer/datadog_checks/kafka_consumer/client.py index d26c81beed385..f4a57d8e9dee0 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/client.py +++ b/kafka_consumer/datadog_checks/kafka_consumer/client.py @@ -3,8 +3,8 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from concurrent.futures import as_completed -from confluent_kafka import Consumer, ConsumerGroupTopicPartitions, KafkaException, TopicPartition -from confluent_kafka.admin import AdminClient +from confluent_kafka import Consumer, ConsumerGroupTopicPartitions, IsolationLevel, KafkaException, TopicPartition +from confluent_kafka.admin import AdminClient, OffsetSpec # AWS MSK IAM authentication support try: @@ -147,25 +147,30 @@ def consumer_get_cluster_id_and_list_topics(self, consumer_group): return "", [] return (cluster_id, [(name, list(metadata.partitions)) for name, metadata in cluster_metadata.topics.items()]) - def consumer_offsets_for_times(self, partitions, offset=-1): - topicpartitions_for_querying = [ - # -1: latest; 0: earliest (timestamp 0) - TopicPartition(topic=topic, partition=partition, offset=offset) - for topic, partition in partitions - ] + def get_partition_offsets(self, partitions, offset=-1): + """Return (topic, partition, offset) tuples, skipping partitions that can't be queried. + + A request-level failure (the batched list_offsets call itself raising, as opposed to an + individual partition's future) is not swallowed here: it propagates so the caller aborts + highwater collection instead of silently treating every partition as missing. + """ + offset_spec = OffsetSpec.earliest() if offset == 0 else OffsetSpec.latest() + request = {TopicPartition(topic=topic, partition=partition): offset_spec for topic, partition in partitions} + if not request: + return [] + + futures = self.kafka_client.list_offsets( + request, + isolation_level=IsolationLevel.READ_UNCOMMITTED, + request_timeout=self.config._request_timeout, + ) + results = [] - for tp in self._consumer.offsets_for_times( - partitions=topicpartitions_for_querying, timeout=self.config._request_timeout - ): - if tp.error: - self.log.debug( - "Failed to get offset for topic %s partition %s: %s", - tp.topic, - tp.partition, - tp.error, - ) - continue - results.append((tp.topic, tp.partition, tp.offset)) + for tp, future in futures.items(): + try: + results.append((tp.topic, tp.partition, future.result().offset)) + except Exception as e: + self.log.debug("Skipping offsets for %s/%s: %s", tp.topic, tp.partition, e) return results def _list_topics(self): diff --git a/kafka_consumer/datadog_checks/kafka_consumer/connectors.py b/kafka_consumer/datadog_checks/kafka_consumer/connectors.py index 3ae0bd7be9762..a857fa75cb07c 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/connectors.py +++ b/kafka_consumer/datadog_checks/kafka_consumer/connectors.py @@ -138,8 +138,7 @@ def _refresh_oauth_token(self) -> None: def collect(self, cluster_id: str) -> dict[str, bool]: """Collect connector data and return connectivity status per endpoint. - Returns a mapping of endpoint identifier to connection success (True/False). - REST URL endpoints use the URL as key; Confluent Cloud uses 'confluent_cloud::'. + Returns a mapping of endpoint URL to connection success (True/False). """ if self.config._kafka_connect_oauth_token_provider: try: @@ -164,18 +163,28 @@ def collect(self, cluster_id: str) -> dict[str, bool]: return connectivity def _collect_rest(self, url: str, cluster_id: str) -> None: - response = self.http.get( - f'{url.rstrip("/")}/connectors', - params={'expand': ['info', 'status']}, - timeout=self.config._request_timeout, - **self._get_request_kwargs(), - ) - response.raise_for_status() - connectors_data = response.json() + endpoint = f'{url.rstrip("/")}/connectors' + + def _fetch(expand: str | list[str]) -> Any: + response = self.http.get( + endpoint, + params={'expand': expand}, + timeout=self.config._request_timeout, + **self._get_request_kwargs(), + ) + response.raise_for_status() + return response.json() + + connectors_data = _fetch(['info', 'status']) + + if not isinstance(connectors_data, dict): + # A combined expand list is serialized as repeated query params (expand=info&expand=status), + # which Confluent Cloud's expansions endpoint doesn't honor — it returns the unexpanded + # connector-name list instead. Retry with a single expand value before concluding the + # worker predates Kafka 2.3 / CP 5.3 and doesn't support expand at all. + connectors_data = _fetch('info') if not isinstance(connectors_data, dict): - # Older Connect workers (pre-Kafka 2.3 / CP 5.3) ignore the expand parameter - # and return a plain list of connector names instead of the expanded dict. self.log.warning( "Unexpected response shape from %s/connectors (got %s, expected dict). " "The Connect worker may not support the expand parameter — Kafka 2.3+ / CP 5.3+ is required.", @@ -184,17 +193,43 @@ def _collect_rest(self, url: str, cluster_id: str) -> None: ) return + # Confluent Cloud honors only one expand value per request; fetch any missing section separately and merge. + for section in ('info', 'status'): + if any((entry or {}).get(section) is None for entry in connectors_data.values()): + try: + self._merge_expand(connectors_data, _fetch(section), section) + except Exception as e: + self.log.warning( + "Failed to fetch supplementary '%s' section from %s/connectors, emitting partial data: %s", + section, + url, + e, + ) + tags_base = self.config._get_tags(cluster_id) + [f'connect_url:{url}'] self.check.gauge('connector.count', len(connectors_data), tags=tags_base) - self._emit_connector_metrics(connectors_data, tags_base) self._emit_connector_config_events(connectors_data, cluster_id, url) self._collect_plugins(url, cluster_id) + @staticmethod + def _merge_expand(connectors_data: dict[str, Any], expanded: Any, section: str) -> None: + if not isinstance(expanded, dict): + return + for name, entry in connectors_data.items(): + if entry is None: + continue + if entry.get(section) is None and isinstance(expanded.get(name), dict): + val = expanded[name].get(section) + if val is not None: + entry[section] = val + def _emit_connector_metrics(self, connectors_data: dict[str, Any], tags_base: list[str]) -> None: for name, data in connectors_data.items(): - info = data.get('info', {}) - status = data.get('status', {}) + if data is None: + continue + info = data.get('info') or {} + status = data.get('status') or {} connector_type = info.get('type', 'unknown') full_class = (info.get('config') or {}).get('connector.class', '') @@ -230,8 +265,10 @@ def _emit_connector_config_events(self, connectors_data: dict[str, Any], cluster connector_contents: dict[str, str] = {} for name, data in connectors_data.items(): - info = data.get('info', {}) - status = data.get('status', {}) + if data is None: + continue + info = data.get('info') or {} + status = data.get('status') or {} connector_type = info.get('type', 'unknown') connector_state = (status.get('connector') or {}).get('state', 'UNKNOWN') diff --git a/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example b/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example index 5c28c3b688e71..c58fedc9b0faa 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example +++ b/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example @@ -460,6 +460,10 @@ instances: ## Kafka Connect REST API URL or list of URLs. When set with enable_cluster_monitoring, ## collects connector metrics and configuration events from the Kafka Connect REST API. ## Accepts a single URL string or a list of URLs to monitor multiple Connect clusters. + ## For Confluent Cloud, set this to + ## ``https://api.confluent.cloud/connect/v1/environments//clusters/`` and supply the + ## Cloud API key (the key ID) as ``kafka_connect_username`` and its API secret as + ## ``kafka_connect_password``. # # kafka_connect_url: http://localhost:8083 diff --git a/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py b/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py index ad95ea27b2958..bb003b6567607 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py +++ b/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py @@ -185,7 +185,7 @@ def _send_cluster_monitoring_connection_error(self, reason: str) -> None: ) def _collect_connect_status(self, cluster_id: str) -> dict[str, bool] | None: - """Collect connector status for all configured Connect URLs, or None if unconfigured.""" + """Collect connector status for all configured Connect endpoints, or None if unconfigured.""" if not self.config._kafka_connect_urls: return None try: @@ -482,7 +482,7 @@ def get_highwater_offsets(self, partitions=None): self.log.debug('Querying %s highwater offsets', len(topic_partitions_to_check)) result = {} - for topic, partition, offset in self.client.consumer_offsets_for_times( + for topic, partition, offset in self.client.get_partition_offsets( partitions=topic_partitions_to_check, offset=HIGH_WATERMARK ): result[(topic, partition)] = offset diff --git a/kafka_consumer/tests/test_cluster_metadata.py b/kafka_consumer/tests/test_cluster_metadata.py index 085cc06865980..e4b5e3b1c3dfe 100644 --- a/kafka_consumer/tests/test_cluster_metadata.py +++ b/kafka_consumer/tests/test_cluster_metadata.py @@ -166,7 +166,7 @@ def mock_offsets_for_times(partitions, offset=-1): else: return [(topic, partition, 10 if partition == 0 else 20) for topic, partition in partitions] - client.consumer_offsets_for_times = mock_offsets_for_times + client.get_partition_offsets = mock_offsets_for_times def mock_list_offsets(requests, **_kwargs): result = {} @@ -656,7 +656,7 @@ def mock_offsets(partitions, offset=-1): else: return [(topic, partition, 10 if partition == 0 else 20) for topic, partition in partitions] - mock_kafka_client.consumer_offsets_for_times = mock_offsets + mock_kafka_client.get_partition_offsets = mock_offsets # Mock cache with previous offsets baseline_cache = { @@ -721,7 +721,7 @@ def mock_offsets_run2(partitions, offset=-1): else: return [(topic, partition, 10 if partition == 0 else 20) for topic, partition in partitions] - mock_kafka_client.consumer_offsets_for_times = mock_offsets_run2 + mock_kafka_client.get_partition_offsets = mock_offsets_run2 prev_cache = json.dumps(baseline_cache) kafka_consumer_check.read_persistent_cache = mock.Mock(return_value=prev_cache) diff --git a/kafka_consumer/tests/test_connectors.py b/kafka_consumer/tests/test_connectors.py index d9d807ce439ba..11e8251986eca 100644 --- a/kafka_consumer/tests/test_connectors.py +++ b/kafka_consumer/tests/test_connectors.py @@ -314,3 +314,118 @@ def test_connector_metrics_match_metadata(run_connect_check, aggregator): run_connect_check(connectors_response=SAMPLE_CONNECTORS_RESPONSE) aggregator.assert_metrics_using_metadata(get_metadata_metrics()) + + +# --------------------------------------------------------------------------- +# Single-expand fallback (e.g. Confluent Cloud) +# --------------------------------------------------------------------------- + +CONFLUENT_CLOUD_URL = 'https://api.confluent.cloud/connect/v1/environments/env-abc123/clusters/lkc-xyz789' + + +def test_single_expand_sections_merged(run_connect_check, aggregator): + """A Connect endpoint honoring one expand value per request still yields complete metrics.""" + info_only = { + 'my-conn': { + 'info': { + 'type': 'source', + 'config': {'connector.class': 'io.confluent.SomeSource'}, + }, + 'status': None, + } + } + status_only = { + 'my-conn': { + 'status': { + 'connector': {'state': 'RUNNING'}, + 'tasks': [{'id': 0, 'state': 'RUNNING'}], + 'type': 'source', + } + } + } + + def get(url, **kwargs): + response = mock.MagicMock() + if 'connector-plugins' in url: + response.json.return_value = [] + return response + expand = kwargs.get('params', {}).get('expand') + response.json.return_value = status_only if expand == 'status' else info_only + return response + + _, http = run_connect_check( + get_side_effect=get, + instance_extra={'kafka_connect_url': CONFLUENT_CLOUD_URL}, + ) + + aggregator.assert_metric('kafka.connector.count', value=1) + aggregator.assert_metric('kafka.connector.task.count', value=1) + aggregator.assert_metric_has_tag('kafka.connector.task.count', 'connector_state:running') + aggregator.assert_metric('kafka.connector.task.running', value=1) + + # The info section arrives via the supplementary fetch; assert it survived the merge + # by checking the config event derived from it. + events = dsm_events(aggregator, 'connector') + assert len(events) == 1 + assert events[0]['connector'] == 'my-conn' + assert events[0]['connector_type'] == 'source' + assert events[0]['connector_state'] == 'RUNNING' + assert events[0]['config']['connector.class'] == 'io.confluent.SomeSource' + + # A supplementary /connectors fetch is issued because the combined call returns a null section. + connectors_fetches = [call for call in http.get.call_args_list if 'connector-plugins' not in call.args[0]] + assert len(connectors_fetches) == 2 + + +def test_oss_combined_response_makes_single_connectors_request(run_connect_check, aggregator): + """When the combined call returns both sections, no supplementary fetch is issued.""" + _, http = run_connect_check(connectors_response=SAMPLE_CONNECTORS_RESPONSE) + + connectors_fetches = [call for call in http.get.call_args_list if 'connector-plugins' not in call.args[0]] + assert len(connectors_fetches) == 1 + + +def test_combined_expand_list_ignored_falls_back_to_single_expand(run_connect_check, aggregator): + """A combined ``expand`` list is serialized as repeated query params, which Confluent Cloud's + expansions endpoint doesn't honor — it returns the plain connector-name list instead of the + expanded dict. The collector must retry with a single expand value rather than treating that + response as an unsupported (pre-2.3) worker. + """ + name_list = ['my-conn'] + info_only = {'my-conn': {'info': {'type': 'source', 'config': {'connector.class': 'io.confluent.SomeSource'}}}} + status_only = { + 'my-conn': { + 'status': {'connector': {'state': 'RUNNING'}, 'tasks': [{'id': 0, 'state': 'RUNNING'}], 'type': 'source'} + } + } + + def get(url, **kwargs): + response = mock.MagicMock() + if 'connector-plugins' in url: + response.json.return_value = [] + return response + expand = kwargs.get('params', {}).get('expand') + if expand == 'status': + response.json.return_value = status_only + elif expand == 'info': + response.json.return_value = info_only + else: + response.json.return_value = name_list + return response + + _, http = run_connect_check( + get_side_effect=get, + instance_extra={'kafka_connect_url': CONFLUENT_CLOUD_URL}, + ) + + aggregator.assert_metric('kafka.connector.count', value=1) + aggregator.assert_metric('kafka.connector.task.count', value=1) + aggregator.assert_metric_has_tag('kafka.connector.task.count', 'connector_state:running') + + events = dsm_events(aggregator, 'connector') + assert len(events) == 1 + assert events[0]['connector'] == 'my-conn' + assert events[0]['config']['connector.class'] == 'io.confluent.SomeSource' + + connectors_fetches = [call for call in http.get.call_args_list if 'connector-plugins' not in call.args[0]] + assert len(connectors_fetches) == 3 diff --git a/kafka_consumer/tests/test_unit.py b/kafka_consumer/tests/test_unit.py index 96082a8a51e24..ce025afe1aa30 100644 --- a/kafka_consumer/tests/test_unit.py +++ b/kafka_consumer/tests/test_unit.py @@ -14,7 +14,7 @@ pytestmark = [pytest.mark.unit] -def fake_consumer_offsets_for_times(partitions, offset=-1): +def fake_get_partition_offsets(partitions, offset=-1): """In our testing environment the offset is 80 for all partitions and topics.""" return [(t, p, 80) for t, p in partitions] @@ -41,7 +41,7 @@ def seed_mock_client(cluster_id="cluster_id"): ('__consumer_offsets', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ], ) - client.consumer_offsets_for_times = fake_consumer_offsets_for_times + client.get_partition_offsets = fake_get_partition_offsets return client @@ -500,7 +500,7 @@ def test_check_clears_cache_on_partial_reset(kafka_instance, check, dd_run_check mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 30)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 100)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 100)] kafka_consumer_check.client = mock_client # Cache has entries below (5, 50) and above (200) the new highwater of 100 — all must be cleared. @@ -585,7 +585,7 @@ def test_check_compacts_timestamps_and_preserves_lag_accuracy( mock_client = seed_mock_client() mock_client.get_partitions_for_topic.return_value = [0] mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, consumer_offset)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, highwater_offset)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, highwater_offset)] kafka_consumer_check.client = mock_client kafka_consumer_check.read_persistent_cache = mock.Mock(return_value=json.dumps(initial_cache)) @@ -609,7 +609,7 @@ def test_check_prunes_timestamps_below_earliest_consumer_offset(kafka_instance, mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 25)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client # Pre-seed the cache with 3 entries. Adding the new highwater at 40 fills the 4-entry @@ -747,7 +747,7 @@ def test_check_prunes_floor_uses_minimum_offset_across_groups(kafka_instance, ch ("group1", [("topic1", 0, 25)]), ("group2", [("topic1", 0, 5)]), ] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client # With floor=5 (correct min), nothing below 5 exists, so no pruning; VW keeps {10, 40}. @@ -776,7 +776,7 @@ def test_check_prunes_anchor_at_floor_boundary(kafka_instance, check, dd_run_che mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 30)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client initial_cache = {"topic1_0": {"10": 1.0, "20": 2.0, "30": 3.0}} @@ -802,7 +802,7 @@ def test_check_keeps_sole_entry_below_floor_as_anchor(kafka_instance, check, dd_ mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 25)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client initial_cache = {"topic1_0": {"20": 2.0, "30": 3.0}} @@ -1054,3 +1054,113 @@ def test_connection_error_sink_failure_does_not_mask_broker_error(check, kafka_i kafka_consumer_check.event_platform_event = mock.Mock(side_effect=Exception('intake unavailable')) with pytest.raises(Exception, match="Unable to connect to the AdminClient"): dd_run_check(kafka_consumer_check) + + +def _offset_future(offset): + """Build a list_offsets future whose result() returns an object with the given offset.""" + future = mock.MagicMock() + future.result.return_value = mock.MagicMock(offset=offset) + return future + + +def _raising_future(exc): + """Build a list_offsets future whose result() raises the given exception.""" + future = mock.MagicMock() + future.result.side_effect = exc + return future + + +def test_get_partition_offsets_skips_unqueryable_partitions(): + """A partition whose list_offsets future raises is skipped; healthy partitions are still returned.""" + from confluent_kafka import KafkaException, TopicPartition + + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + + futures = { + TopicPartition(topic="healthy_topic", partition=0): _offset_future(100), + TopicPartition(topic="bad_topic", partition=0): _raising_future(KafkaException("UNKNOWN_TOPIC_OR_PART")), + } + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.return_value = futures + + results = client.get_partition_offsets([("healthy_topic", 0), ("bad_topic", 0)]) + + # (a) no exception escaped, (b) the healthy partition's offset is returned, + # (c) the unqueryable partition is skipped. + assert results == [("healthy_topic", 0, 100)] + + +def test_get_partition_offsets_skips_partition_on_non_kafka_error(): + """A non-Kafka error on one partition's future is skipped, not propagated, so the loop survives.""" + from confluent_kafka import TopicPartition + + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + + futures = { + TopicPartition(topic="healthy_topic", partition=0): _offset_future(100), + TopicPartition(topic="bad_topic", partition=0): _raising_future(RuntimeError("unexpected")), + } + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.return_value = futures + + results = client.get_partition_offsets([("healthy_topic", 0), ("bad_topic", 0)]) + + assert results == [("healthy_topic", 0, 100)] + + +def test_get_partition_offsets_raises_when_list_offsets_request_fails(): + """A request/broker-level list_offsets failure propagates, aborting highwater collection.""" + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.side_effect = RuntimeError("connection dropped") + + with pytest.raises(RuntimeError): + client.get_partition_offsets([("topic_a", 0)]) + + +def test_get_partition_offsets_empty_partitions_returns_empty_without_request(): + """No partitions means no list_offsets request is issued and an empty result is returned.""" + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + client._kafka_client = mock.MagicMock() + + results = client.get_partition_offsets([]) + + assert results == [] + assert client._kafka_client.list_offsets.call_count == 0 + + +def test_get_partition_offsets_returns_all_healthy_partitions(): + """When every list_offsets future succeeds, all partition offsets are returned.""" + from confluent_kafka import IsolationLevel, TopicPartition + + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + + futures = { + TopicPartition(topic="topic_a", partition=0): _offset_future(42), + TopicPartition(topic="topic_b", partition=1): _offset_future(7), + } + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.return_value = futures + + results = client.get_partition_offsets([("topic_a", 0), ("topic_b", 1)]) + + assert sorted(results) == [("topic_a", 0, 42), ("topic_b", 1, 7)] + assert client._kafka_client.list_offsets.call_count == 1 + # READ_UNCOMMITTED is load-bearing: READ_COMMITTED would return the LSO, not the true high watermark. + assert client._kafka_client.list_offsets.call_args.kwargs["isolation_level"] == IsolationLevel.READ_UNCOMMITTED + assert client._kafka_client.list_offsets.call_args.kwargs["request_timeout"] == 5 diff --git a/nutanix/assets/configuration/spec.yaml b/nutanix/assets/configuration/spec.yaml index 1f1fefbf5170b..93b752ccd95fd 100644 --- a/nutanix/assets/configuration/spec.yaml +++ b/nutanix/assets/configuration/spec.yaml @@ -165,6 +165,17 @@ files: type: boolean example: false fleet_configurable: false + - name: collect_resource_ids_as_tags + description: | + Whether to collect Nutanix resource extId values as tags on infrastructure metrics. + When true, cluster metrics get 'ntnx_cluster_id', host metrics get 'ntnx_host_id', + and VM metrics get 'ntnx_vm_id', 'ntnx_host_id', and 'ntnx_cluster_id'. + These tags are high cardinality (one unique value per resource). + Default is false. + value: + type: boolean + example: false + fleet_configurable: false - name: collect_events description: | Whether to collect events from Prism Central. diff --git a/nutanix/changelog.d/24303.added b/nutanix/changelog.d/24303.added new file mode 100644 index 0000000000000..328a1477a7048 --- /dev/null +++ b/nutanix/changelog.d/24303.added @@ -0,0 +1 @@ +Add the ``collect_resource_ids_as_tags`` option to tag infrastructure metrics with Nutanix resource extIds. diff --git a/nutanix/datadog_checks/nutanix/config_models/defaults.py b/nutanix/datadog_checks/nutanix/config_models/defaults.py index cc2c4b58209ff..6b795908fd07f 100644 --- a/nutanix/datadog_checks/nutanix/config_models/defaults.py +++ b/nutanix/datadog_checks/nutanix/config_models/defaults.py @@ -32,6 +32,10 @@ def instance_collect_events(): return True +def instance_collect_resource_ids_as_tags(): + return False + + def instance_collect_subtasks(): return False diff --git a/nutanix/datadog_checks/nutanix/config_models/instance.py b/nutanix/datadog_checks/nutanix/config_models/instance.py index 1ecfd17b5bbc3..287270d34f2bb 100644 --- a/nutanix/datadog_checks/nutanix/config_models/instance.py +++ b/nutanix/datadog_checks/nutanix/config_models/instance.py @@ -81,6 +81,7 @@ class InstanceConfig(BaseModel): collect_alerts: Optional[bool] = None collect_audits: Optional[bool] = None collect_events: Optional[bool] = None + collect_resource_ids_as_tags: Optional[bool] = None collect_subtasks: Optional[bool] = None collect_tasks: Optional[bool] = None connect_timeout: Optional[float] = None diff --git a/nutanix/datadog_checks/nutanix/data/conf.yaml.example b/nutanix/datadog_checks/nutanix/data/conf.yaml.example index c4cb4cfcd3c34..a25794ee84165 100644 --- a/nutanix/datadog_checks/nutanix/data/conf.yaml.example +++ b/nutanix/datadog_checks/nutanix/data/conf.yaml.example @@ -136,6 +136,15 @@ instances: # # prefix_category_tags: false + ## @param collect_resource_ids_as_tags - boolean - optional - default: false + ## Whether to collect Nutanix resource extId values as tags on infrastructure metrics. + ## When true, cluster metrics get 'ntnx_cluster_id', host metrics get 'ntnx_host_id', + ## and VM metrics get 'ntnx_vm_id', 'ntnx_host_id', and 'ntnx_cluster_id'. + ## These tags are high cardinality (one unique value per resource). + ## Default is false. + # + # collect_resource_ids_as_tags: false + ## @param collect_events - boolean - optional - default: true ## Whether to collect events from Prism Central. ## Default is true. diff --git a/nutanix/datadog_checks/nutanix/infrastructure_monitor.py b/nutanix/datadog_checks/nutanix/infrastructure_monitor.py index 0ea0b49c64109..e39694f0c1949 100644 --- a/nutanix/datadog_checks/nutanix/infrastructure_monitor.py +++ b/nutanix/datadog_checks/nutanix/infrastructure_monitor.py @@ -512,6 +512,7 @@ def _report_host_status_metrics(self, host: dict, hostname: str, host_tags: list def _extract_host_tags(self, host: dict) -> list[str]: """Extract tags from a host object.""" + host_id = host.get("extId") host_name = host.get("hostName") host_type = _normalize_tag_value(host.get("hostType")) maintenance_state = _normalize_tag_value(host.get("maintenanceState")) @@ -521,6 +522,8 @@ def _extract_host_tags(self, host: dict) -> list[str]: tags = [] tags.append("ntnx_type:host") + if self.check.config.collect_resource_ids_as_tags and host_id: + tags.append(f"ntnx_host_id:{host_id}") if host_name: tags.append(f"ntnx_host_name:{host_name}") tags.append(f"ntnx_host_type:{host_type}") @@ -535,10 +538,13 @@ def _extract_host_tags(self, host: dict) -> list[str]: def _extract_cluster_tags(self, cluster: dict) -> list[str]: """Extract tags from a cluster object.""" + cluster_id = cluster.get("extId") cluster_name = cluster.get("name") operation_mode = _normalize_tag_value(get_nested(cluster, "config/operationMode")) tags = [] + if self.check.config.collect_resource_ids_as_tags and cluster_id: + tags.append(f"ntnx_cluster_id:{cluster_id}") if cluster_name: tags.append(f"ntnx_cluster_name:{cluster_name}") tags.append(f"ntnx_operation_mode:{operation_mode}") @@ -554,13 +560,21 @@ def _extract_vm_tags(self, vm: dict) -> list[str]: is_agent_vm = is_affirmative(vm.get("isAgentVm")) power_state = _normalize_tag_value(vm.get("powerState")) + collect_ids = self.check.config.collect_resource_ids_as_tags + tags = [] tags.append("ntnx_type:vm") + if collect_ids and (vm_id := vm.get("extId")): + tags.append(f"ntnx_vm_id:{vm_id}") if vm_name: tags.append(f"ntnx_vm_name:{vm_name}") tags.extend(self.check.extract_category_tags(vm)) + if collect_ids and host_id: + tags.append(f"ntnx_host_id:{host_id}") if host_id and host_id in self.host_names: tags.append(f"ntnx_host_name:{self.host_names[host_id]}") + if collect_ids and cluster_id: + tags.append(f"ntnx_cluster_id:{cluster_id}") if cluster_id and cluster_id in self.cluster_names: tags.append(f"ntnx_cluster_name:{self.cluster_names[cluster_id]}") tags.append(f"ntnx_is_agent_vm:{is_agent_vm}") diff --git a/nutanix/tests/test_configuration.py b/nutanix/tests/test_configuration.py index 0f75350626ed9..59cc644402671 100644 --- a/nutanix/tests/test_configuration.py +++ b/nutanix/tests/test_configuration.py @@ -6,6 +6,7 @@ import pytest from datadog_checks.nutanix import NutanixCheck +from tests.constants import HOST_NAME, HOST_TAGS, UBUNTU_VM_NAME, UBUNTU_VM_TAGS pytestmark = [pytest.mark.unit] @@ -112,6 +113,63 @@ def test_category_tags_with_prefix_for_system_and_user_types(dd_run_check, aggre assert not unprefixed_tags, f"Category tags must have ntnx_ prefix, found without: {unprefixed_tags}" +RESOURCE_ID_METRICS = ("nutanix.cluster.count", "nutanix.host.count", "nutanix.vm.count") + +# extId values of the cluster, host, and ubuntu-vm served by mock_http_get (see conftest.py). +CLUSTER_ID = "00064715-c043-5d8f-ee4b-176ec875554d" +HOST_ID = "d8787814-4fe8-4ba5-931f-e1ee31c294a6" +UBUNTU_VM_ID = "7b9d5b24-b99a-4c62-516c-0fe0c20411dd" + + +def test_resource_ids_not_collected_by_default(dd_run_check, aggregator, mock_instance, mock_http_get, datadog_agent): + check = NutanixCheck('nutanix', {}, [mock_instance]) + dd_run_check(check) + + for metric_name in RESOURCE_ID_METRICS: + assert aggregator.metrics(metric_name), f"{metric_name} was not emitted" + for metric in aggregator.metrics(metric_name): + id_tags = [t for t in metric.tags if t.startswith(("ntnx_cluster_id:", "ntnx_host_id:", "ntnx_vm_id:"))] + assert not id_tags, f"{metric_name} should not have resource id tags by default, found: {id_tags}" + + # Id tags must not leak into the separate external-tags submission path either. + datadog_agent.assert_external_tags(HOST_NAME, {'nutanix': HOST_TAGS}) + datadog_agent.assert_external_tags(UBUNTU_VM_NAME, {'nutanix': UBUNTU_VM_TAGS}) + + +def test_resource_ids_collected_when_enabled(dd_run_check, aggregator, mock_instance, mock_http_get, datadog_agent): + instance = mock_instance.copy() + instance['collect_resource_ids_as_tags'] = True + + check = NutanixCheck('nutanix', {}, [instance]) + dd_run_check(check) + + aggregator.assert_metric_has_tag("nutanix.cluster.count", f"ntnx_cluster_id:{CLUSTER_ID}") + + aggregator.assert_metric_has_tag("nutanix.host.count", f"ntnx_host_id:{HOST_ID}") + aggregator.assert_metric_has_tag("nutanix.host.count", f"ntnx_cluster_id:{CLUSTER_ID}") + + aggregator.assert_metric_has_tag("nutanix.vm.count", f"ntnx_vm_id:{UBUNTU_VM_ID}") + aggregator.assert_metric_has_tag("nutanix.vm.count", f"ntnx_host_id:{HOST_ID}") + aggregator.assert_metric_has_tag("nutanix.vm.count", f"ntnx_cluster_id:{CLUSTER_ID}") + + # Id tags ride the shared tag list onto non-count metrics, not just the *.count gauges. + aggregator.assert_metric_has_tag("nutanix.host.status", f"ntnx_host_id:{HOST_ID}") + aggregator.assert_metric_has_tag("nutanix.vm.status", f"ntnx_vm_id:{UBUNTU_VM_ID}") + + # Id tags also propagate to host-level external tags (separate submission path). + datadog_agent.assert_external_tags( + HOST_NAME, + {'nutanix': HOST_TAGS + [f"ntnx_cluster_id:{CLUSTER_ID}", f"ntnx_host_id:{HOST_ID}"]}, + ) + datadog_agent.assert_external_tags( + UBUNTU_VM_NAME, + { + 'nutanix': UBUNTU_VM_TAGS + + [f"ntnx_vm_id:{UBUNTU_VM_ID}", f"ntnx_host_id:{HOST_ID}", f"ntnx_cluster_id:{CLUSTER_ID}"] + }, + ) + + def test_invalid_hostname_transform_raises_error(dd_run_check, mock_instance): instance = mock_instance.copy() instance['hostname_transform'] = 'INVALID'