From 6eacfb6887a48841a3afc6da21f13b4e09db4cb9 Mon Sep 17 00:00:00 2001 From: NouemanKHAL Date: Fri, 3 Jul 2026 11:29:08 +0200 Subject: [PATCH 1/6] Add option to collect Nutanix resource IDs as tags (#24303) * feat(nutanix): add collect_resource_ids_as_tags option Adds an opt-in instance option that attaches Nutanix resource extId values as tags on cluster, host, and VM infrastructure metrics, so Datadog metrics can be correlated with specific Nutanix resources. Defaults to false because the tags are high cardinality (one unique value per resource). Co-Authored-By: Claude Opus 4.8 * docs(nutanix): add changelog entry for #24303 Co-Authored-By: Claude Opus 4.8 --------- Co-authored-by: Claude Opus 4.8 --- nutanix/assets/configuration/spec.yaml | 11 ++++ nutanix/changelog.d/24303.added | 1 + .../nutanix/config_models/defaults.py | 4 ++ .../nutanix/config_models/instance.py | 1 + .../nutanix/data/conf.yaml.example | 9 +++ .../nutanix/infrastructure_monitor.py | 14 +++++ nutanix/tests/test_configuration.py | 58 +++++++++++++++++++ 7 files changed, 98 insertions(+) create mode 100644 nutanix/changelog.d/24303.added diff --git a/nutanix/assets/configuration/spec.yaml b/nutanix/assets/configuration/spec.yaml index 1f1fefbf5170b..93b752ccd95fd 100644 --- a/nutanix/assets/configuration/spec.yaml +++ b/nutanix/assets/configuration/spec.yaml @@ -165,6 +165,17 @@ files: type: boolean example: false fleet_configurable: false + - name: collect_resource_ids_as_tags + description: | + Whether to collect Nutanix resource extId values as tags on infrastructure metrics. + When true, cluster metrics get 'ntnx_cluster_id', host metrics get 'ntnx_host_id', + and VM metrics get 'ntnx_vm_id', 'ntnx_host_id', and 'ntnx_cluster_id'. + These tags are high cardinality (one unique value per resource). + Default is false. + value: + type: boolean + example: false + fleet_configurable: false - name: collect_events description: | Whether to collect events from Prism Central. diff --git a/nutanix/changelog.d/24303.added b/nutanix/changelog.d/24303.added new file mode 100644 index 0000000000000..328a1477a7048 --- /dev/null +++ b/nutanix/changelog.d/24303.added @@ -0,0 +1 @@ +Add the ``collect_resource_ids_as_tags`` option to tag infrastructure metrics with Nutanix resource extIds. diff --git a/nutanix/datadog_checks/nutanix/config_models/defaults.py b/nutanix/datadog_checks/nutanix/config_models/defaults.py index cc2c4b58209ff..6b795908fd07f 100644 --- a/nutanix/datadog_checks/nutanix/config_models/defaults.py +++ b/nutanix/datadog_checks/nutanix/config_models/defaults.py @@ -32,6 +32,10 @@ def instance_collect_events(): return True +def instance_collect_resource_ids_as_tags(): + return False + + def instance_collect_subtasks(): return False diff --git a/nutanix/datadog_checks/nutanix/config_models/instance.py b/nutanix/datadog_checks/nutanix/config_models/instance.py index 1ecfd17b5bbc3..287270d34f2bb 100644 --- a/nutanix/datadog_checks/nutanix/config_models/instance.py +++ b/nutanix/datadog_checks/nutanix/config_models/instance.py @@ -81,6 +81,7 @@ class InstanceConfig(BaseModel): collect_alerts: Optional[bool] = None collect_audits: Optional[bool] = None collect_events: Optional[bool] = None + collect_resource_ids_as_tags: Optional[bool] = None collect_subtasks: Optional[bool] = None collect_tasks: Optional[bool] = None connect_timeout: Optional[float] = None diff --git a/nutanix/datadog_checks/nutanix/data/conf.yaml.example b/nutanix/datadog_checks/nutanix/data/conf.yaml.example index c4cb4cfcd3c34..a25794ee84165 100644 --- a/nutanix/datadog_checks/nutanix/data/conf.yaml.example +++ b/nutanix/datadog_checks/nutanix/data/conf.yaml.example @@ -136,6 +136,15 @@ instances: # # prefix_category_tags: false + ## @param collect_resource_ids_as_tags - boolean - optional - default: false + ## Whether to collect Nutanix resource extId values as tags on infrastructure metrics. + ## When true, cluster metrics get 'ntnx_cluster_id', host metrics get 'ntnx_host_id', + ## and VM metrics get 'ntnx_vm_id', 'ntnx_host_id', and 'ntnx_cluster_id'. + ## These tags are high cardinality (one unique value per resource). + ## Default is false. + # + # collect_resource_ids_as_tags: false + ## @param collect_events - boolean - optional - default: true ## Whether to collect events from Prism Central. ## Default is true. diff --git a/nutanix/datadog_checks/nutanix/infrastructure_monitor.py b/nutanix/datadog_checks/nutanix/infrastructure_monitor.py index 0ea0b49c64109..e39694f0c1949 100644 --- a/nutanix/datadog_checks/nutanix/infrastructure_monitor.py +++ b/nutanix/datadog_checks/nutanix/infrastructure_monitor.py @@ -512,6 +512,7 @@ def _report_host_status_metrics(self, host: dict, hostname: str, host_tags: list def _extract_host_tags(self, host: dict) -> list[str]: """Extract tags from a host object.""" + host_id = host.get("extId") host_name = host.get("hostName") host_type = _normalize_tag_value(host.get("hostType")) maintenance_state = _normalize_tag_value(host.get("maintenanceState")) @@ -521,6 +522,8 @@ def _extract_host_tags(self, host: dict) -> list[str]: tags = [] tags.append("ntnx_type:host") + if self.check.config.collect_resource_ids_as_tags and host_id: + tags.append(f"ntnx_host_id:{host_id}") if host_name: tags.append(f"ntnx_host_name:{host_name}") tags.append(f"ntnx_host_type:{host_type}") @@ -535,10 +538,13 @@ def _extract_host_tags(self, host: dict) -> list[str]: def _extract_cluster_tags(self, cluster: dict) -> list[str]: """Extract tags from a cluster object.""" + cluster_id = cluster.get("extId") cluster_name = cluster.get("name") operation_mode = _normalize_tag_value(get_nested(cluster, "config/operationMode")) tags = [] + if self.check.config.collect_resource_ids_as_tags and cluster_id: + tags.append(f"ntnx_cluster_id:{cluster_id}") if cluster_name: tags.append(f"ntnx_cluster_name:{cluster_name}") tags.append(f"ntnx_operation_mode:{operation_mode}") @@ -554,13 +560,21 @@ def _extract_vm_tags(self, vm: dict) -> list[str]: is_agent_vm = is_affirmative(vm.get("isAgentVm")) power_state = _normalize_tag_value(vm.get("powerState")) + collect_ids = self.check.config.collect_resource_ids_as_tags + tags = [] tags.append("ntnx_type:vm") + if collect_ids and (vm_id := vm.get("extId")): + tags.append(f"ntnx_vm_id:{vm_id}") if vm_name: tags.append(f"ntnx_vm_name:{vm_name}") tags.extend(self.check.extract_category_tags(vm)) + if collect_ids and host_id: + tags.append(f"ntnx_host_id:{host_id}") if host_id and host_id in self.host_names: tags.append(f"ntnx_host_name:{self.host_names[host_id]}") + if collect_ids and cluster_id: + tags.append(f"ntnx_cluster_id:{cluster_id}") if cluster_id and cluster_id in self.cluster_names: tags.append(f"ntnx_cluster_name:{self.cluster_names[cluster_id]}") tags.append(f"ntnx_is_agent_vm:{is_agent_vm}") diff --git a/nutanix/tests/test_configuration.py b/nutanix/tests/test_configuration.py index 0f75350626ed9..59cc644402671 100644 --- a/nutanix/tests/test_configuration.py +++ b/nutanix/tests/test_configuration.py @@ -6,6 +6,7 @@ import pytest from datadog_checks.nutanix import NutanixCheck +from tests.constants import HOST_NAME, HOST_TAGS, UBUNTU_VM_NAME, UBUNTU_VM_TAGS pytestmark = [pytest.mark.unit] @@ -112,6 +113,63 @@ def test_category_tags_with_prefix_for_system_and_user_types(dd_run_check, aggre assert not unprefixed_tags, f"Category tags must have ntnx_ prefix, found without: {unprefixed_tags}" +RESOURCE_ID_METRICS = ("nutanix.cluster.count", "nutanix.host.count", "nutanix.vm.count") + +# extId values of the cluster, host, and ubuntu-vm served by mock_http_get (see conftest.py). +CLUSTER_ID = "00064715-c043-5d8f-ee4b-176ec875554d" +HOST_ID = "d8787814-4fe8-4ba5-931f-e1ee31c294a6" +UBUNTU_VM_ID = "7b9d5b24-b99a-4c62-516c-0fe0c20411dd" + + +def test_resource_ids_not_collected_by_default(dd_run_check, aggregator, mock_instance, mock_http_get, datadog_agent): + check = NutanixCheck('nutanix', {}, [mock_instance]) + dd_run_check(check) + + for metric_name in RESOURCE_ID_METRICS: + assert aggregator.metrics(metric_name), f"{metric_name} was not emitted" + for metric in aggregator.metrics(metric_name): + id_tags = [t for t in metric.tags if t.startswith(("ntnx_cluster_id:", "ntnx_host_id:", "ntnx_vm_id:"))] + assert not id_tags, f"{metric_name} should not have resource id tags by default, found: {id_tags}" + + # Id tags must not leak into the separate external-tags submission path either. + datadog_agent.assert_external_tags(HOST_NAME, {'nutanix': HOST_TAGS}) + datadog_agent.assert_external_tags(UBUNTU_VM_NAME, {'nutanix': UBUNTU_VM_TAGS}) + + +def test_resource_ids_collected_when_enabled(dd_run_check, aggregator, mock_instance, mock_http_get, datadog_agent): + instance = mock_instance.copy() + instance['collect_resource_ids_as_tags'] = True + + check = NutanixCheck('nutanix', {}, [instance]) + dd_run_check(check) + + aggregator.assert_metric_has_tag("nutanix.cluster.count", f"ntnx_cluster_id:{CLUSTER_ID}") + + aggregator.assert_metric_has_tag("nutanix.host.count", f"ntnx_host_id:{HOST_ID}") + aggregator.assert_metric_has_tag("nutanix.host.count", f"ntnx_cluster_id:{CLUSTER_ID}") + + aggregator.assert_metric_has_tag("nutanix.vm.count", f"ntnx_vm_id:{UBUNTU_VM_ID}") + aggregator.assert_metric_has_tag("nutanix.vm.count", f"ntnx_host_id:{HOST_ID}") + aggregator.assert_metric_has_tag("nutanix.vm.count", f"ntnx_cluster_id:{CLUSTER_ID}") + + # Id tags ride the shared tag list onto non-count metrics, not just the *.count gauges. + aggregator.assert_metric_has_tag("nutanix.host.status", f"ntnx_host_id:{HOST_ID}") + aggregator.assert_metric_has_tag("nutanix.vm.status", f"ntnx_vm_id:{UBUNTU_VM_ID}") + + # Id tags also propagate to host-level external tags (separate submission path). + datadog_agent.assert_external_tags( + HOST_NAME, + {'nutanix': HOST_TAGS + [f"ntnx_cluster_id:{CLUSTER_ID}", f"ntnx_host_id:{HOST_ID}"]}, + ) + datadog_agent.assert_external_tags( + UBUNTU_VM_NAME, + { + 'nutanix': UBUNTU_VM_TAGS + + [f"ntnx_vm_id:{UBUNTU_VM_ID}", f"ntnx_host_id:{HOST_ID}", f"ntnx_cluster_id:{CLUSTER_ID}"] + }, + ) + + def test_invalid_hostname_transform_raises_error(dd_run_check, mock_instance): instance = mock_instance.copy() instance['hostname_transform'] = 'INVALID' From 2c25c0cecbda084905416d12a46b24b1ad09dd3e Mon Sep 17 00:00:00 2001 From: HadhemiDD <43783545+HadhemiDD@users.noreply.github.com> Date: Fri, 3 Jul 2026 12:36:33 +0200 Subject: [PATCH 2/6] Bundle per-job correlation into BatchFinished via BatchJobResult (#24253) * Bundle per-job correlation into BatchFinished via BatchJobResult Move the job spec / workflow-run / artifact-directory correlation out of the gatherer and into the TaskTestRunner producer, shipping it as a single enriched per-job BatchJobResult on BatchFinished. - BatchJob.artifact_name(): deterministic, collision-free, sanitized artifact name derived solely from the job's frozen fields. Replaces the gatherer's delimiter-bounded token match as the way a job's artifact directory is resolved. - BatchJobResult bundles the BatchJob spec, the matched WorkflowJob (or None), and the resolved artifact path (or None). - BatchFinished now carries batch_jobs; the runner joins each job to its workflow job by name and resolves its artifact directory by artifact name, tolerating missing API matches and missing artifacts. Co-Authored-By: Claude Opus 4.8 * updates * updates * updates * updates * Address review comments: BatchJobResult.correlate factory and focused tests - Move correlation logic from TaskTestRunner._build_batch_jobs into a BatchJobResult.correlate static factory so it is testable without the processor. - _download_artifacts returns only the artifact-name -> path map; artifacts_path is taken from the runner options. - test_messages.py: explicit-args batch_job factory, drop the tautological determinism test, add direct correlate tests. - test_task_test_runner.py: drop underscore-prefixed helpers and split the monolithic happy-path test into one-concern-per-test cases via a shared helper. Co-Authored-By: Claude Opus 4.8 * Add changelog entry Co-Authored-By: Claude Opus 4.8 * Change changelog entry type to fixed Co-Authored-By: Claude Opus 4.8 --------- Co-authored-by: Claude Opus 4.8 --- ddev/changelog.d/24253.fixed | 1 + ddev/src/ddev/cli/ci/tests/messages.py | 79 ++++- .../src/ddev/cli/ci/tests/task_test_runner.py | 45 ++- ddev/src/ddev/utils/github_async/client.py | 29 ++ .../utils/github_async/models/__init__.py | 6 + .../utils/github_async/models/workflow.py | 36 +- ddev/tests/cli/ci/tests/test_messages.py | 96 ++++++ .../cli/ci/tests/test_task_test_runner.py | 323 +++++++++++++----- ddev/tests/helpers/github_async.py | 38 +++ 9 files changed, 552 insertions(+), 101 deletions(-) create mode 100644 ddev/changelog.d/24253.fixed create mode 100644 ddev/tests/cli/ci/tests/test_messages.py diff --git a/ddev/changelog.d/24253.fixed b/ddev/changelog.d/24253.fixed new file mode 100644 index 0000000000000..ee22e145029e0 --- /dev/null +++ b/ddev/changelog.d/24253.fixed @@ -0,0 +1 @@ +Bundle per-job correlation into the CI dispatcher's BatchFinished via BatchJobResult. diff --git a/ddev/src/ddev/cli/ci/tests/messages.py b/ddev/src/ddev/cli/ci/tests/messages.py index 291c185b4c035..574b828a3a8fa 100644 --- a/ddev/src/ddev/cli/ci/tests/messages.py +++ b/ddev/src/ddev/cli/ci/tests/messages.py @@ -3,11 +3,23 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from __future__ import annotations -from dataclasses import dataclass -from typing import Literal +import re +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Literal from ddev.event_bus.orchestrator import BaseMessage +if TYPE_CHECKING: + from pathlib import Path + + from ddev.utils.github_async.models import WorkflowJob + +# Characters GitHub disallows in an artifact name (plus CR/LF). +ARTIFACT_NAME_DISALLOWED = re.compile(r'["\:<>|*?\\/\r\n]') +# Separator between the artifact name's fields. Names are matched by reconstruction, not by +# splitting, so the separator does not need to be absent from the field values. +ARTIFACT_NAME_SEPARATOR = "_" + @dataclass class BatchJob: @@ -21,6 +33,16 @@ class BatchJob: unit_tests: bool e2e_tests: bool + def artifact_name(self) -> str: + """Deterministic artifact name built from the job's target, environment, and platform. + + Pure and deterministic. Each field is sanitized to GitHub's artifact-name constraints and + joined by the separator. Uniqueness within a batch relies on those three fields being + distinct per job. + """ + fields = (self.target, self.environment, self.platform) + return ARTIFACT_NAME_SEPARATOR.join(ARTIFACT_NAME_DISALLOWED.sub("_", field) for field in fields) + @dataclass class FailedCheck: @@ -30,6 +52,57 @@ class FailedCheck: url: str +@dataclass +class BatchJobResult: + """Everything known about a single job in a finished batch, correlated by the producer. + + ``artifact_name_path`` is the single downloaded folder for the job (named after the job's + ``artifact_name``); the three ``*_artifact_name`` fields are the expected per-facet file names + inside that folder. + """ + + job: BatchJob + workflow_job: WorkflowJob | None + artifact_name_path: str | None + unit_artifact_name: str + e2e_artifact_name: str + coverage_artifact_name: str + + @staticmethod + def correlate( + job_list: list[BatchJob], + jobs: list[WorkflowJob], + artifact_dirs: dict[str, Path], + ) -> list[BatchJobResult]: + """Correlate each job's spec, its workflow-run result, and its artifact directory. + + The workflow-job join is by name (tolerant of misses). Each job's artifact folder is matched + by reconstructing its name from the job's fields (``artifact_name``) and looking it up among + the downloaded folders; the path is recorded only when it exists on disk. That single folder + holds the three per-facet files, whose names (``unit-``/``e2e-``/``coverage-`` prefixed on + the base name) are recorded for the gatherer. A job missing from the API or from disk still + yields a well-formed result. + """ + jobs_by_name = {job.name: job for job in jobs} + + results: list[BatchJobResult] = [] + for batch_job in job_list: + base = batch_job.artifact_name() + artifact_dir = artifact_dirs.get(base) + artifact_name_path = str(artifact_dir) if artifact_dir is not None and artifact_dir.exists() else None + results.append( + BatchJobResult( + job=batch_job, + workflow_job=jobs_by_name.get(batch_job.name), + artifact_name_path=artifact_name_path, + unit_artifact_name=f"unit-{base}", + e2e_artifact_name=f"e2e-{base}", + coverage_artifact_name=f"coverage-{base}", + ) + ) + return results + + @dataclass class WorkflowStatus: """Status of a single GitHub Actions workflow run.""" @@ -58,6 +131,8 @@ class BatchFinished(BaseMessage): run_id: int workflow_url: str artifacts_path: str + timed_out: bool = False + batch_jobs: list[BatchJobResult] = field(default_factory=list) @dataclass diff --git a/ddev/src/ddev/cli/ci/tests/task_test_runner.py b/ddev/src/ddev/cli/ci/tests/task_test_runner.py index 101a5b6b892e0..6b02821e9fd81 100644 --- a/ddev/src/ddev/cli/ci/tests/task_test_runner.py +++ b/ddev/src/ddev/cli/ci/tests/task_test_runner.py @@ -11,10 +11,10 @@ from pathlib import Path from typing import Any, Literal -from ddev.cli.ci.tests.messages import BatchFinished, TestBatch +from ddev.cli.ci.tests.messages import BatchFinished, BatchJob, BatchJobResult, TestBatch from ddev.event_bus.orchestrator import AsyncProcessor from ddev.utils.github_async import AsyncGitHubClient, GitHubResponse -from ddev.utils.github_async.models import WorkflowRun +from ddev.utils.github_async.models import WorkflowJob, WorkflowRun def _conclusion_to_status(conclusion: str | None) -> Literal["success", "failure", "skipped"]: @@ -98,15 +98,19 @@ async def process_message(self, message: TestBatch) -> None: self._logger.warning("Workflow completed with null conclusion", extra=log_extra) final_conclusion = raw or "neutral" - artifacts_path = await self._download_artifacts(run_id, log_extra) + artifact_dirs = await self._download_artifacts(run_id, log_extra) self._logger.info("Artifacts downloaded", extra=log_extra) + jobs = await self._list_jobs(run_id, log_extra) + batch_jobs = BatchJobResult.correlate(message.job_list, jobs, artifact_dirs) + finished = BatchFinished( id=message.id, status=_conclusion_to_status(raw), run_id=run_id, workflow_url=workflow_url, - artifacts_path=str(artifacts_path), + artifacts_path=str(self._options.artifacts_base_path), + batch_jobs=batch_jobs, ) finally: try: @@ -134,16 +138,38 @@ async def _poll_until_complete(self, run_id: int, log_extra: dict[str, Any]) -> self._logger.info("Workflow completed", extra=log_extra) return run + async def _list_jobs(self, run_id: int, log_extra: dict[str, Any]) -> list[WorkflowJob]: + """Fetch the workflow run's jobs; on failure log a warning and return an empty list.""" + jobs: list[WorkflowJob] = [] + try: + async for page in self._client.list_workflow_jobs(self._options.owner, self._options.repo, run_id): + jobs.extend(page.data.jobs) + except Exception: + self._logger.warning("Failed to list workflow jobs", extra=log_extra, exc_info=True) + return jobs + def _build_inputs(self, message: TestBatch) -> dict[str, str]: return { "batch_id": message.id, "checkout_sha": self._options.checkout_sha, "integrations": json.dumps(message.integrations), - "job_list": json.dumps([dataclasses.asdict(job) for job in message.job_list]), + "job_list": json.dumps([self._job_input(job) for job in message.job_list]), } - async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> Path: - run_path = self._options.artifacts_base_path / str(run_id) + @staticmethod + def _job_input(job: BatchJob) -> dict[str, Any]: + """Serialize a job for the workflow, carrying the artifact name so all its files upload under + a single folder/zip named after it (matched later via ``BatchJob.artifact_name``).""" + return {**dataclasses.asdict(job), "artifact_name": job.artifact_name()} + + async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> dict[str, Path]: + """Download the run's artifacts and return an artifact-name -> path map. + + The map keys on the GitHub artifact name (the contract a ``BatchJob`` reproduces via + ``artifact_name``), letting the producer resolve each job's directory deterministically. + """ + base_path = self._options.artifacts_base_path + artifact_dirs: dict[str, Path] = {} failures: list[tuple[int, str]] = [] try: async for page in self._client.list_workflow_run_artifacts(self._options.owner, self._options.repo, run_id): @@ -164,9 +190,10 @@ async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> P extra=log_extra, ) continue - target = run_path / f"{artifact.id}-{artifact.name}" + target = base_path / artifact.name try: await self._client.download_artifact(artifact.archive_download_url, target) + artifact_dirs[artifact.name] = target self._logger.info("Downloaded artifact %s -> %s", artifact.id, target, extra=log_extra) except Exception as exc: self._logger.warning( @@ -186,4 +213,4 @@ async def _download_artifacts(self, run_id: int, log_extra: dict[str, Any]) -> P failures, extra=log_extra, ) - return run_path + return artifact_dirs diff --git a/ddev/src/ddev/utils/github_async/client.py b/ddev/src/ddev/utils/github_async/client.py index 8cfdc6bfda8eb..050968ad43ced 100644 --- a/ddev/src/ddev/utils/github_async/client.py +++ b/ddev/src/ddev/utils/github_async/client.py @@ -25,6 +25,7 @@ PullRequest, PullRequestReviewComment, WorkflowDispatchResult, + WorkflowJobsList, WorkflowRun, ) @@ -251,6 +252,34 @@ async def list_workflow_run_artifacts( async for response in self._paginated_request("GET", endpoint, timeout=timeout, params={"per_page": per_page}): yield self._parse_response(response, ArtifactsList) + async def list_workflow_jobs( + self, + owner: str, + repo: str, + run_id: int, + per_page: int = 30, + timeout: float | None = None, + ) -> AsyncIterator[GitHubResponse[WorkflowJobsList]]: + """ + Calls the GitHub API to list jobs for a workflow run (paginated). + + GitHub API Documentation: + https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run + + Args: + owner: Repository owner (user or organisation). + repo: Repository name. + run_id: Numeric ID of the workflow run. + per_page: Number of jobs per page (default 30, max 100). + timeout: Optional timeout for this specific request. Defaults to the client's default_timeout. + + Returns: + AsyncIterator[GitHubResponse[WorkflowJobsList]]: One page of jobs per iteration. + """ + endpoint = f"/repos/{owner}/{repo}/actions/runs/{run_id}/jobs" + async for response in self._paginated_request("GET", endpoint, timeout=timeout, params={"per_page": per_page}): + yield self._parse_response(response, WorkflowJobsList) + async def create_issue_comment( self, owner: str, diff --git a/ddev/src/ddev/utils/github_async/models/__init__.py b/ddev/src/ddev/utils/github_async/models/__init__.py index 6f498789eb153..719050c723ecd 100644 --- a/ddev/src/ddev/utils/github_async/models/__init__.py +++ b/ddev/src/ddev/utils/github_async/models/__init__.py @@ -34,7 +34,10 @@ from .user import GitHubUser as GitHubUser from .workflow import Artifact as Artifact from .workflow import ArtifactsList as ArtifactsList + from .workflow import JobStep as JobStep from .workflow import WorkflowDispatchResult as WorkflowDispatchResult + from .workflow import WorkflowJob as WorkflowJob + from .workflow import WorkflowJobsList as WorkflowJobsList from .workflow import WorkflowRun as WorkflowRun # Map of exported attribute name -> submodule (relative to this package) that @@ -45,11 +48,14 @@ 'CheckRun': 'check_run', 'GitHubUser': 'user', 'IssueComment': 'comment', + 'JobStep': 'workflow', 'Label': 'label', 'PullRequest': 'pull_request', 'PullRequestRef': 'pull_request', 'PullRequestReviewComment': 'comment', 'WorkflowDispatchResult': 'workflow', + 'WorkflowJob': 'workflow', + 'WorkflowJobsList': 'workflow', 'WorkflowRun': 'workflow', } diff --git a/ddev/src/ddev/utils/github_async/models/workflow.py b/ddev/src/ddev/utils/github_async/models/workflow.py index 41662a698803e..264dc500889d2 100644 --- a/ddev/src/ddev/utils/github_async/models/workflow.py +++ b/ddev/src/ddev/utils/github_async/models/workflow.py @@ -5,7 +5,7 @@ from __future__ import annotations -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field class WorkflowRun(BaseModel): @@ -50,3 +50,37 @@ class ArtifactsList(BaseModel): total_count: int artifacts: list[Artifact] + + +class JobStep(BaseModel): + """A single step within a GitHub Actions job.""" + + model_config = ConfigDict(extra="ignore") + + name: str + status: str + conclusion: str | None = None + number: int | None = None + + +class WorkflowJob(BaseModel): + """A single job within a GitHub Actions workflow run.""" + + model_config = ConfigDict(extra="ignore") + + id: int + run_id: int + name: str + status: str + conclusion: str | None = None + html_url: str | None = None + steps: list[JobStep] = Field(default_factory=list) + + +class WorkflowJobsList(BaseModel): + """A list of jobs with a total count.""" + + model_config = ConfigDict(extra="ignore") + + total_count: int + jobs: list[WorkflowJob] diff --git a/ddev/tests/cli/ci/tests/test_messages.py b/ddev/tests/cli/ci/tests/test_messages.py new file mode 100644 index 0000000000000..db8afba2180e1 --- /dev/null +++ b/ddev/tests/cli/ci/tests/test_messages.py @@ -0,0 +1,96 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Tests for the ci/tests pipeline messages.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from ddev.cli.ci.tests.messages import ARTIFACT_NAME_DISALLOWED, BatchJob, BatchJobResult +from ddev.utils.github_async.models import WorkflowJob + + +def batch_job( + name="job-1", + target="ntp", + runner="ubuntu-latest", + environment="py3.13", + platform="linux", + unit_tests=True, + e2e_tests=False, +) -> BatchJob: + return BatchJob( + name=name, + target=target, + runner=runner, + environment=environment, + platform=platform, + unit_tests=unit_tests, + e2e_tests=e2e_tests, + ) + + +def test_artifact_name_built_from_target_env_platform() -> None: + assert batch_job().artifact_name() == "ntp_py3.13_linux" + + +@pytest.mark.parametrize("field", ["name", "runner", "unit_tests", "e2e_tests"]) +def test_artifact_name_ignores_non_identifying_fields(field: str) -> None: + # name / runner / unit_tests / e2e_tests are not part of the artifact name. + changed = {"name": "other-job", "runner": "windows-latest", "unit_tests": False, "e2e_tests": True}[field] + assert batch_job(**{field: changed}).artifact_name() == batch_job().artifact_name() + + +@pytest.mark.parametrize( + ("field", "value"), + [("target", "kafka"), ("environment", "py3.12"), ("platform", "windows")], +) +def test_artifact_name_varies_with_identifying_fields(field: str, value: str) -> None: + assert batch_job(**{field: value}).artifact_name() != batch_job().artifact_name() + + +def test_artifact_name_sanitizes_disallowed_characters() -> None: + name = batch_job(target='a/b:c*d?e|f"gi\\j', environment="x\r\ny").artifact_name() + assert ARTIFACT_NAME_DISALLOWED.search(name) is None + + +def test_correlate_matches_jobs_and_artifacts(tmp_path: Path) -> None: + job = batch_job("j1") + base = job.artifact_name() + artifact_dir = tmp_path / base + artifact_dir.mkdir() + workflow_job = WorkflowJob(id=1, run_id=123, name="j1", status="completed", conclusion="success") + + [result] = BatchJobResult.correlate([job], [workflow_job], {base: artifact_dir}) + + assert result.job == job + assert result.workflow_job is workflow_job + assert result.artifact_name_path == str(artifact_dir) + assert result.unit_artifact_name == f"unit-{base}" + assert result.e2e_artifact_name == f"e2e-{base}" + assert result.coverage_artifact_name == f"coverage-{base}" + + +def test_correlate_without_workflow_or_artifact_match() -> None: + # A job absent from the workflow API and with no matching artifact folder still yields a + # well-formed result whose correlated facets are None. + job = batch_job("j1") + + [result] = BatchJobResult.correlate([job], [], {}) + + assert result.job == job + assert result.workflow_job is None + assert result.artifact_name_path is None + + +def test_correlate_ignores_artifact_dir_missing_on_disk(tmp_path: Path) -> None: + # A mapped path that does not exist on disk is not recorded. + job = batch_job("j1") + base = job.artifact_name() + + [result] = BatchJobResult.correlate([job], [], {base: tmp_path / base}) + + assert result.artifact_name_path is None diff --git a/ddev/tests/cli/ci/tests/test_task_test_runner.py b/ddev/tests/cli/ci/tests/test_task_test_runner.py index 603b6839049fd..8313b88ce6812 100644 --- a/ddev/tests/cli/ci/tests/test_task_test_runner.py +++ b/ddev/tests/cli/ci/tests/test_task_test_runner.py @@ -19,6 +19,8 @@ from ddev.utils.github_async.models import ( Artifact, ArtifactsList, + WorkflowJob, + WorkflowJobsList, WorkflowRun, ) from tests.helpers.github_async import FakeAsyncGitHubClient @@ -28,27 +30,27 @@ # --------------------------------------------------------------------------- -def _wrap(data: Any) -> GitHubResponse[Any]: +def wrap(data: Any) -> GitHubResponse[Any]: return GitHubResponse(data=data, headers={}) -def _job(name: str = "job-1") -> BatchJob: +def make_job(name: str = "job-1", environment: str = "py3.13") -> BatchJob: return BatchJob( name=name, target="ntp", runner="ubuntu-latest", - environment="py3.13", + environment=environment, platform="linux", unit_tests=True, e2e_tests=False, ) -_DEFAULT_URL = object() +DEFAULT_URL = object() -def _artifact(idx: int, expired: bool = False, archive_download_url: Any = _DEFAULT_URL) -> Artifact: - url = f"https://api.github.com/artifact/{idx}/zip" if archive_download_url is _DEFAULT_URL else archive_download_url +def make_artifact(idx: int, expired: bool = False, archive_download_url: Any = DEFAULT_URL) -> Artifact: + url = f"https://api.github.com/artifact/{idx}/zip" if archive_download_url is DEFAULT_URL else archive_download_url return Artifact( id=idx, name=f"artifact-{idx}", @@ -59,7 +61,7 @@ def _artifact(idx: int, expired: bool = False, archive_download_url: Any = _DEFA ) -def _workflow_run(status: str = "completed", conclusion: str | None = "success") -> WorkflowRun: +def make_workflow_run(status: str = "completed", conclusion: str | None = "success") -> WorkflowRun: return WorkflowRun( id=123, name="test-batch", @@ -69,15 +71,29 @@ def _workflow_run(status: str = "completed", conclusion: str | None = "success") ) -def _artifacts_page(artifacts: list[Artifact]) -> GitHubResponse[ArtifactsList]: - return _wrap(ArtifactsList(total_count=len(artifacts), artifacts=list(artifacts))) +def artifacts_page(artifacts: list[Artifact]) -> GitHubResponse[ArtifactsList]: + return wrap(ArtifactsList(total_count=len(artifacts), artifacts=list(artifacts))) -def _mock_artifacts(fake: FakeAsyncGitHubClient, artifacts: list[Artifact]) -> None: - fake.mock_response("list_workflow_run_artifacts", _artifacts_page(artifacts)) +def mock_artifacts(fake: FakeAsyncGitHubClient, artifacts: list[Artifact]) -> None: + fake.mock_response("list_workflow_run_artifacts", artifacts_page(artifacts)) -def _make_runner(client: FakeAsyncGitHubClient, tmp_path: Path) -> TaskTestRunner: +def make_artifact_for(idx: int, job: BatchJob) -> Artifact: + """Artifact whose name matches a job's deterministic artifact name (the upload/download contract).""" + artifact = make_artifact(idx) + return artifact.model_copy(update={"name": job.artifact_name()}) + + +def make_workflow_job(name: str, conclusion: str = "success") -> WorkflowJob: + return WorkflowJob(id=1, run_id=123, name=name, status="completed", conclusion=conclusion) + + +def mock_jobs(fake: FakeAsyncGitHubClient, jobs: list[WorkflowJob]) -> None: + fake.mock_response("list_workflow_jobs", wrap(WorkflowJobsList(total_count=len(jobs), jobs=list(jobs)))) + + +def make_runner(client: FakeAsyncGitHubClient, tmp_path: Path) -> TaskTestRunner: options = TestRunnerOptions( owner="DataDog", repo="integrations-core", @@ -97,15 +113,38 @@ def _make_runner(client: FakeAsyncGitHubClient, tmp_path: Path) -> TaskTestRunne return runner -def _drain_queue(queue: asyncio.Queue[BaseMessage]) -> list[BaseMessage]: +def drain_queue(queue: asyncio.Queue[BaseMessage]) -> list[BaseMessage]: out: list[BaseMessage] = [] while not queue.empty(): out.append(queue.get_nowait()) return out -def _batch(batch_id: str = "batch-err") -> TestBatch: - return TestBatch(id=batch_id, job_list=[_job()], jobs_count=1, integrations=["ntp"]) +def make_batch(batch_id: str = "batch-err") -> TestBatch: + return TestBatch(id=batch_id, job_list=[make_job()], jobs_count=1, integrations=["ntp"]) + + +async def run_happy_path(tmp_path: Path) -> tuple[FakeAsyncGitHubClient, BatchFinished]: + """Run a clean two-job batch through the runner once and return the client and the BatchFinished. + + The two jobs share a target/environment/platform, so their artifact names collide with each other + and never match the generic ``artifact-N`` uploads: correlation therefore finds no match. + """ + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1), make_artifact(2)]) + runner = make_runner(fake, tmp_path) + + batch = TestBatch( + id="batch-1", job_list=[make_job("j1"), make_job("j2")], jobs_count=2, integrations=["ntp", "kafka"] + ) + await runner.process_message(batch) + + submitted = drain_queue(runner.queue) + assert len(submitted) == 1 + finished = submitted[0] + assert isinstance(finished, BatchFinished) + return fake, finished # --------------------------------------------------------------------------- @@ -131,21 +170,14 @@ def test_conclusion_to_status(conclusion: str | None, expected: str) -> None: # --------------------------------------------------------------------------- -# process_message +# process_message — happy path (one concern per test) # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_process_message_happy_path(tmp_path: Path) -> None: - fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1), _artifact(2)]) - runner = _make_runner(fake, tmp_path) +async def test_dispatches_workflow_with_job_list_payload(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) - batch = TestBatch(id="batch-1", job_list=[_job("j1"), _job("j2")], jobs_count=2, integrations=["ntp", "kafka"]) - await runner.process_message(batch) - - # Dispatch once, with the right shape. dispatch_calls = fake.calls_to("create_workflow_dispatch") assert len(dispatch_calls) == 1 assert dispatch_calls[0].kwargs == { @@ -168,6 +200,7 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: "platform": "linux", "unit_tests": True, "e2e_tests": False, + "artifact_name": "ntp_py3.13_linux", }, { "name": "j2", @@ -177,13 +210,18 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: "platform": "linux", "unit_tests": True, "e2e_tests": False, + "artifact_name": "ntp_py3.13_linux", }, ] ), }, } - # Check run opened with the PR head SHA, in_progress, with the workflow URL as details_url. + +@pytest.mark.asyncio +async def test_opens_check_run_with_head_sha_and_details_url(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) + create_calls = fake.calls_to("create_check_run") assert len(create_calls) == 1 cr = create_calls[0].kwargs @@ -192,30 +230,56 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: assert cr["name"] == "test-batch/batch-1" assert cr["details_url"] == "https://github.com/o/r/actions/runs/123" - # Both artifacts downloaded under //- (collision-safe path). + +@pytest.mark.asyncio +async def test_downloads_all_batch_artifacts(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) + download_calls = fake.calls_to("download_artifact") assert len(download_calls) == 2 assert (download_calls[0].kwargs["archive_download_url"], download_calls[0].kwargs["dest_path"]) == ( "https://api.github.com/artifact/1/zip", - tmp_path / "123" / "1-artifact-1", + tmp_path / "artifact-1", ) assert (download_calls[1].kwargs["archive_download_url"], download_calls[1].kwargs["dest_path"]) == ( "https://api.github.com/artifact/2/zip", - tmp_path / "123" / "2-artifact-2", + tmp_path / "artifact-2", ) - # Exactly one BatchFinished message submitted with the right fields. - submitted = _drain_queue(runner.queue) - assert len(submitted) == 1 - finished = submitted[0] - assert isinstance(finished, BatchFinished) + +@pytest.mark.asyncio +async def test_emits_batch_finished_with_run_metadata(tmp_path: Path) -> None: + _, finished = await run_happy_path(tmp_path) + assert finished.id == "batch-1" assert finished.status == "success" assert finished.run_id == 123 assert finished.workflow_url == "https://github.com/o/r/actions/runs/123" - assert finished.artifacts_path == str(tmp_path / "123") + assert finished.artifacts_path == str(tmp_path) + + +@pytest.mark.asyncio +async def test_batch_finished_records_unmatched_correlation_when_no_match(tmp_path: Path) -> None: + # The two jobs' artifact names collide and don't match the generic artifacts, and there is no + # jobs API match, so both correlated facets are None while the per-facet file names are recorded. + _, finished = await run_happy_path(tmp_path) + + assert [r.job.name for r in finished.batch_jobs] == ["j1", "j2"] + assert all(r.workflow_job is None and r.artifact_name_path is None for r in finished.batch_jobs) + + first = finished.batch_jobs[0] + base = make_job("j1").artifact_name() + assert (first.unit_artifact_name, first.e2e_artifact_name, first.coverage_artifact_name) == ( + f"unit-{base}", + f"e2e-{base}", + f"coverage-{base}", + ) + + +@pytest.mark.asyncio +async def test_closes_check_run_with_workflow_conclusion(tmp_path: Path) -> None: + fake, _ = await run_happy_path(tmp_path) - # Check run closed with the GitHub conclusion. update_calls = fake.calls_to("update_check_run") assert len(update_calls) == 1 upd = update_calls[0].kwargs @@ -224,16 +288,97 @@ async def test_process_message_happy_path(tmp_path: Path) -> None: assert upd["conclusion"] == "success" +# --------------------------------------------------------------------------- +# process_message — correlation +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_process_message_correlates_batch_jobs(tmp_path: Path) -> None: + # A failed multi-job run where j1 passed and j2 failed: each batch_jobs entry must carry its + # own true per-job status and its artifact directory, resolved by the job's artifact name. + # The two jobs differ in an artifact-relevant field (environment) so their base names differ. + j1, j2 = make_job("j1", environment="py3.13"), make_job("j2", environment="py3.12") + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "failure")) + mock_artifacts(fake, [make_artifact_for(1, j1), make_artifact_for(2, j2)]) + mock_jobs(fake, [make_workflow_job("j1", "success"), make_workflow_job("j2", "failure")]) + runner = make_runner(fake, tmp_path) + + await runner.process_message(TestBatch(id="batch-c", job_list=[j1, j2], jobs_count=2, integrations=["ntp"])) + + finished = drain_queue(runner.queue)[0] + assert isinstance(finished, BatchFinished) + assert finished.status == "failure" + + results = {r.job.name: r for r in finished.batch_jobs} + assert set(results) == {"j1", "j2"} + # Passing job is not marked failed; each carries its true workflow-run conclusion. + assert results["j1"].workflow_job is not None and results["j1"].workflow_job.conclusion == "success" + assert results["j2"].workflow_job is not None and results["j2"].workflow_job.conclusion == "failure" + # Each job's single artifact folder is resolved by its base artifact name (no heuristic matching). + assert results["j1"].artifact_name_path == str(tmp_path / j1.artifact_name()) + assert results["j2"].artifact_name_path == str(tmp_path / j2.artifact_name()) + # The per-facet file names inside each folder are recorded from the base artifact name. + assert results["j1"].unit_artifact_name == f"unit-{j1.artifact_name()}" + assert results["j2"].coverage_artifact_name == f"coverage-{j2.artifact_name()}" + + +@pytest.mark.asyncio +async def test_process_message_batch_job_without_workflow_match(tmp_path: Path) -> None: + # A job present in the batch but absent from the workflow-run API response still yields a + # well-formed entry: its artifact is located but workflow_job is None. + job = make_job("j1") + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact_for(1, job)]) + # list_workflow_jobs defaults to an empty page. + runner = make_runner(fake, tmp_path) + + await runner.process_message(TestBatch(id="batch-d", job_list=[job], jobs_count=1, integrations=["ntp"])) + + finished = drain_queue(runner.queue)[0] + assert isinstance(finished, BatchFinished) + [result] = finished.batch_jobs + assert result.job == job + assert result.workflow_job is None + assert result.artifact_name_path == str(tmp_path / job.artifact_name()) + + +@pytest.mark.asyncio +async def test_process_message_batch_job_without_artifacts(tmp_path: Path) -> None: + # A job with no artifacts on disk still yields a well-formed entry with artifact_name_path None. + job = make_job("j1") + fake = FakeAsyncGitHubClient() + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, []) + mock_jobs(fake, [make_workflow_job("j1", "success")]) + runner = make_runner(fake, tmp_path) + + await runner.process_message(TestBatch(id="batch-e", job_list=[job], jobs_count=1, integrations=["ntp"])) + + finished = drain_queue(runner.queue)[0] + assert isinstance(finished, BatchFinished) + [result] = finished.batch_jobs + assert result.workflow_job is not None and result.workflow_job.conclusion == "success" + assert result.artifact_name_path is None + + +# --------------------------------------------------------------------------- +# process_message — conclusions and resilience +# --------------------------------------------------------------------------- + + @pytest.mark.asyncio async def test_process_message_failure_path(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "failure")) - _mock_artifacts(fake, [_artifact(1)]) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "failure")) + mock_artifacts(fake, [make_artifact(1)]) + runner = make_runner(fake, tmp_path) - await runner.process_message(TestBatch(id="batch-2", job_list=[_job()], jobs_count=1, integrations=["ntp"])) + await runner.process_message(TestBatch(id="batch-2", job_list=[make_job()], jobs_count=1, integrations=["ntp"])) - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -248,14 +393,14 @@ async def test_process_message_failure_path(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_skipped_conclusion(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "skipped")) - _mock_artifacts(fake, []) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "skipped")) + mock_artifacts(fake, []) + runner = make_runner(fake, tmp_path) - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # A "skipped" GitHub conclusion maps to a "skipped" BatchFinished and a "skipped" check run. - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -271,14 +416,14 @@ async def test_process_message_polls_until_completed(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() # Initial get + polls until "completed"; FIFO one-shots replay in order. for status in ("queued", "in_progress", "in_progress", "completed"): - fake.mock_response("get_workflow_run", _workflow_run(status, "success"), once=True) - _mock_artifacts(fake, []) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run(status, "success"), once=True) + mock_artifacts(fake, []) + runner = make_runner(fake, tmp_path) - await runner.process_message(TestBatch(id="batch-3", job_list=[_job()], jobs_count=1, integrations=["ntp"])) + await runner.process_message(TestBatch(id="batch-3", job_list=[make_job()], jobs_count=1, integrations=["ntp"])) assert len(fake.calls_to("get_workflow_run")) == 4 - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 assert isinstance(submitted[0], BatchFinished) assert submitted[0].status == "success" @@ -287,18 +432,18 @@ async def test_process_message_polls_until_completed(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_skips_expired_artifacts(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts( + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts( fake, [ - _artifact(1), - _artifact(2, expired=True), - _artifact(3, archive_download_url=None), + make_artifact(1), + make_artifact(2, expired=True), + make_artifact(3, archive_download_url=None), ], ) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) - await runner.process_message(TestBatch(id="batch-4", job_list=[_job()], jobs_count=1, integrations=["ntp"])) + await runner.process_message(TestBatch(id="batch-4", job_list=[make_job()], jobs_count=1, integrations=["ntp"])) # Only the non-expired artifact with a download URL should be fetched. download_calls = fake.calls_to("download_artifact") @@ -309,14 +454,14 @@ async def test_process_message_skips_expired_artifacts(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_null_conclusion(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", None)) - _mock_artifacts(fake, []) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", None)) + mock_artifacts(fake, []) + runner = make_runner(fake, tmp_path) - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # A null GitHub conclusion maps to a "failure" BatchFinished and a "neutral" check run. - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -330,19 +475,19 @@ async def test_process_message_null_conclusion(tmp_path: Path) -> None: @pytest.mark.asyncio async def test_process_message_emits_batch_finished_when_listing_artifacts_fails(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) fake.mock_response("list_workflow_run_artifacts", RuntimeError("boom-list-artifacts")) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) # A failure listing artifacts must not abort the batch: the check run is still closed # and exactly one BatchFinished is emitted with the workflow's real conclusion. - await runner.process_message(_batch()) + await runner.process_message(make_batch()) update_calls = fake.calls_to("update_check_run") assert len(update_calls) == 1 assert update_calls[0].kwargs["conclusion"] == "success" - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 finished = submitted[0] assert isinstance(finished, BatchFinished) @@ -352,16 +497,16 @@ async def test_process_message_emits_batch_finished_when_listing_artifacts_fails @pytest.mark.asyncio async def test_process_message_swallows_check_run_close_failure(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1)]) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1)]) fake.mock_response("update_check_run", RuntimeError("boom-close")) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) # A failure closing the check run must not propagate or suppress the BatchFinished. - await runner.process_message(_batch()) + await runner.process_message(make_batch()) assert len(fake.calls_to("update_check_run")) == 1 - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 assert isinstance(submitted[0], BatchFinished) assert submitted[0].status == "success" @@ -370,16 +515,16 @@ async def test_process_message_swallows_check_run_close_failure(tmp_path: Path) @pytest.mark.asyncio async def test_download_failure_for_one_artifact_does_not_abort_others(tmp_path: Path) -> None: fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1), _artifact(2), _artifact(3)]) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1), make_artifact(2), make_artifact(3)]) fake.mock_response( "download_artifact", RuntimeError("download failure for artifact 2"), archive_download_url="https://api.github.com/artifact/2/zip", ) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # All three were attempted; the failure for #2 didn't abort #3. urls = [call.kwargs["archive_download_url"] for call in fake.calls_to("download_artifact")] @@ -388,7 +533,7 @@ async def test_download_failure_for_one_artifact_does_not_abort_others(tmp_path: "https://api.github.com/artifact/2/zip", "https://api.github.com/artifact/3/zip", ] - submitted = _drain_queue(runner.queue) + submitted = drain_queue(runner.queue) assert len(submitted) == 1 assert isinstance(submitted[0], BatchFinished) assert submitted[0].status == "success" @@ -409,10 +554,10 @@ async def test_failure_before_check_run_opens_does_not_create_check_run(tmp_path fake.mock_response("create_workflow_dispatch", boom) else: fake.mock_response("get_workflow_run", boom) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) with pytest.raises(RuntimeError, match=f"boom-{failure_point}"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # The check run is never opened, so there is nothing to close. fake.assert_not_called("create_check_run") @@ -424,12 +569,12 @@ async def test_failure_mid_poll_closes_check_run_as_cancelled(tmp_path: Path) -> boom = RuntimeError("boom-mid-poll") fake = FakeAsyncGitHubClient() # Initial get succeeds (still running), the first poll raises. - fake.mock_response("get_workflow_run", _workflow_run("queued"), once=True) + fake.mock_response("get_workflow_run", make_workflow_run("queued"), once=True) fake.mock_response("get_workflow_run", boom, once=True) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) with pytest.raises(RuntimeError, match="boom-mid-poll"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # The check run was opened and the finally closed it as cancelled. assert len(fake.calls_to("create_check_run")) == 1 @@ -443,12 +588,12 @@ async def test_failure_mid_poll_closes_check_run_as_cancelled(tmp_path: Path) -> async def test_failure_at_create_check_run_does_not_close_check_run(tmp_path: Path) -> None: boom = RuntimeError("boom-create-check-run") fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) fake.mock_response("create_check_run", boom) - runner = _make_runner(fake, tmp_path) + runner = make_runner(fake, tmp_path) with pytest.raises(RuntimeError, match="boom-create-check-run"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # The open was attempted but failed before the try/finally, so there is no check run to close. assert len(fake.calls_to("create_check_run")) == 1 @@ -459,9 +604,9 @@ async def test_failure_at_create_check_run_does_not_close_check_run(tmp_path: Pa async def test_failure_at_submit_message_closes_check_run_as_success(tmp_path: Path) -> None: boom = RuntimeError("boom-submit-message") fake = FakeAsyncGitHubClient() - fake.mock_response("get_workflow_run", _workflow_run("completed", "success")) - _mock_artifacts(fake, [_artifact(1)]) - runner = _make_runner(fake, tmp_path) + fake.mock_response("get_workflow_run", make_workflow_run("completed", "success")) + mock_artifacts(fake, [make_artifact(1)]) + runner = make_runner(fake, tmp_path) class _BoomQueue: def put_nowait(self, _: Any) -> None: @@ -470,7 +615,7 @@ def put_nowait(self, _: Any) -> None: runner.queue = _BoomQueue() # type: ignore[assignment] with pytest.raises(RuntimeError, match="boom-submit-message"): - await runner.process_message(_batch()) + await runner.process_message(make_batch()) # Workflow completed cleanly; the finally closed the check run as success before submit raised. assert len(fake.calls_to("create_check_run")) == 1 diff --git a/ddev/tests/helpers/github_async.py b/ddev/tests/helpers/github_async.py index 2986a7bbf44b4..0ff79c53421e4 100644 --- a/ddev/tests/helpers/github_async.py +++ b/ddev/tests/helpers/github_async.py @@ -37,6 +37,7 @@ def test_thing(fake_async_github): from collections.abc import AsyncIterator, Callable from dataclasses import dataclass, field +from pathlib import Path from typing import Any import httpx @@ -48,6 +49,7 @@ def test_thing(fake_async_github): Label, PullRequest, WorkflowDispatchResult, + WorkflowJobsList, WorkflowRun, ) @@ -127,6 +129,11 @@ def _default_response_factories() -> dict[str, Callable[[], Any]]: data=ArtifactsList(total_count=0, artifacts=[]), headers={}, ), + # An empty page; tests that need jobs register their own WorkflowJobsList. + 'list_workflow_jobs': lambda: GitHubResponse( + data=WorkflowJobsList(total_count=0, jobs=[]), + headers={}, + ), # Download is a side-effecting no-op by default; per-URL failures are registered explicitly. 'download_artifact': lambda: None, } @@ -357,6 +364,36 @@ async def list_workflow_run_artifacts( else: yield GitHubResponse.model_validate({'data': page, 'headers': {}}) + async def list_workflow_jobs( + self, + owner: str, + repo: str, + run_id: int, + per_page: int = 30, + timeout: float | None = None, + ) -> AsyncIterator[GitHubResponse[WorkflowJobsList]]: + """Async-generator mirror. A registered response may be a single page or a list of pages.""" + self._record( + 'list_workflow_jobs', + owner=owner, + repo=repo, + run_id=run_id, + per_page=per_page, + timeout=timeout, + ) + response = self._resolve_response( + 'list_workflow_jobs', + {'owner': owner, 'repo': repo, 'run_id': run_id, 'per_page': per_page, 'timeout': timeout}, + ) + if isinstance(response, BaseException): + raise response + pages = response if isinstance(response, list) else [response] + for page in pages: + if isinstance(page, GitHubResponse): + yield page + else: + yield GitHubResponse.model_validate({'data': page, 'headers': {}}) + async def download_artifact( self, archive_download_url: str, @@ -376,6 +413,7 @@ async def download_artifact( ) if isinstance(response, BaseException): raise response + Path(dest_path).mkdir(parents=True, exist_ok=True) return None async def aclose(self) -> None: From 568419fc10ece3e477ae329552ca65b97c036b95 Mon Sep 17 00:00:00 2001 From: Piotr WOLSKI Date: Fri, 3 Jul 2026 14:25:39 +0200 Subject: [PATCH 3/6] kafka_consumer: add Confluent Cloud Connect support (#24033) * kafka_consumer: add Confluent Cloud Connect support Collect connector metrics and configuration events from the Confluent Cloud Connect REST API via the kafka_connect_confluent_cloud_environment_id, kafka_connect_confluent_cloud_cluster_id, and kafka_connect_confluent_cloud_url options. Co-Authored-By: Claude Opus 4.8 (1M context) * kafka_consumer: stop placeholder Confluent Cloud IDs becoming config defaults Add display_default: null to kafka_connect_confluent_cloud_environment_id and kafka_connect_confluent_cloud_cluster_id so the example IDs are not baked into defaults.py and conf.yaml.example. Also document that authentication uses kafka_connect_username/password (Confluent Cloud API key/secret). Co-Authored-By: Claude Opus 4.8 (1M context) * kafka_consumer: harden Confluent Cloud URL fallback and dedupe gating predicate Coerce an explicit-null kafka_connect_confluent_cloud_url to the default base URL to avoid AttributeError on None.rstrip(). Add a confluent_cloud_configured property on KafkaConfig and use it in both _collect_connect_status and _confluent_cloud_key so the gating rule lives in one place. Co-Authored-By: Claude Opus 4.8 (1M context) * kafka_consumer: test Confluent Cloud path emits metrics and config events Add test_confluent_cloud_emits_metrics_and_config_events asserting that with Confluent Cloud configured the run emits kafka.connector.count=2 and connector config events whose connect_url targets the Confluent Cloud endpoint. Co-Authored-By: Claude Opus 4.8 (1M context) * kafka_consumer: support Confluent Cloud Connect generically via response-driven expand fallback Remove the Confluent-Cloud-specific config options and collection code. Confluent Cloud's Connect REST API is OSS-shaped and reachable by pointing kafka_connect_url at the Cloud base path with a Cloud API key as basic auth, so it needs no special handling beyond tolerating its single-expand behavior. Make _collect_rest response-driven: it issues one combined expand=info&status request and, only when a section comes back null (as Confluent Cloud does, honoring one expand value per request), fetches the missing section and merges it. OSS workers return both sections in the combined call, so behavior and request count are unchanged for self-hosted Connect. Also tolerate explicit null info/status values when emitting metrics and config events. Co-Authored-By: Claude Opus 4.8 (1M context) * Apply review fixes for Confluent Cloud Connect support - Guard None connector entries in both _emit_connector_metrics and _emit_connector_config_events so a null entry no longer crashes a URL's whole collection. - _merge_expand: only assign a section when the fallback value is not None, avoiding overwriting with an explicit None. - Supplementary expand fetches now degrade gracefully: each is wrapped in try/except that logs a warning and emits partial data instead of marking the endpoint unreachable. - Add type hints to the new _merge_expand method and _fetch closure. - Clarify the Confluent Cloud auth note in spec.yaml: API key (ID) goes in kafka_connect_username and the secret in kafka_connect_password; regenerate conf.yaml.example. - Strengthen test_single_expand_sections_merged to assert the merged info surfaces in the config event, and add an OSS single-request invariant test. Co-Authored-By: Claude Opus 4.8 (1M context) * kafka_consumer: condense verbose connector comments to one line Co-Authored-By: Claude Opus 4.8 (1M context) * kafka_consumer: retry combined expand as single value for Confluent Cloud Confluent Cloud's Connect v1 expansions endpoint ignores a combined expand list (serialized as repeated query params) and returns the unexpanded connector-name list instead, so the response fell into the pre-2.3 bail-out path before the single-expand fallback could run. Retry with one expand value before giving up. --------- Co-authored-by: Claude Opus 4.8 (1M context) --- kafka_consumer/assets/configuration/spec.yaml | 4 + kafka_consumer/changelog.d/24033.added | 1 + .../kafka_consumer/connectors.py | 71 ++++++++--- .../kafka_consumer/data/conf.yaml.example | 4 + .../kafka_consumer/kafka_consumer.py | 2 +- kafka_consumer/tests/test_connectors.py | 115 ++++++++++++++++++ 6 files changed, 179 insertions(+), 18 deletions(-) create mode 100644 kafka_consumer/changelog.d/24033.added diff --git a/kafka_consumer/assets/configuration/spec.yaml b/kafka_consumer/assets/configuration/spec.yaml index 2a8e6cc9b8929..45cc95e6a5364 100644 --- a/kafka_consumer/assets/configuration/spec.yaml +++ b/kafka_consumer/assets/configuration/spec.yaml @@ -470,6 +470,10 @@ files: Kafka Connect REST API URL or list of URLs. When set with enable_cluster_monitoring, collects connector metrics and configuration events from the Kafka Connect REST API. Accepts a single URL string or a list of URLs to monitor multiple Connect clusters. + For Confluent Cloud, set this to + ``https://api.confluent.cloud/connect/v1/environments//clusters/`` and supply the + Cloud API key (the key ID) as ``kafka_connect_username`` and its API secret as + ``kafka_connect_password``. fleet_configurable: true formats: - url diff --git a/kafka_consumer/changelog.d/24033.added b/kafka_consumer/changelog.d/24033.added new file mode 100644 index 0000000000000..56e647762a612 --- /dev/null +++ b/kafka_consumer/changelog.d/24033.added @@ -0,0 +1 @@ +Support monitoring Confluent Cloud managed connectors by pointing kafka_connect_url at the Confluent Cloud Connect REST API. diff --git a/kafka_consumer/datadog_checks/kafka_consumer/connectors.py b/kafka_consumer/datadog_checks/kafka_consumer/connectors.py index 3ae0bd7be9762..a857fa75cb07c 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/connectors.py +++ b/kafka_consumer/datadog_checks/kafka_consumer/connectors.py @@ -138,8 +138,7 @@ def _refresh_oauth_token(self) -> None: def collect(self, cluster_id: str) -> dict[str, bool]: """Collect connector data and return connectivity status per endpoint. - Returns a mapping of endpoint identifier to connection success (True/False). - REST URL endpoints use the URL as key; Confluent Cloud uses 'confluent_cloud::'. + Returns a mapping of endpoint URL to connection success (True/False). """ if self.config._kafka_connect_oauth_token_provider: try: @@ -164,18 +163,28 @@ def collect(self, cluster_id: str) -> dict[str, bool]: return connectivity def _collect_rest(self, url: str, cluster_id: str) -> None: - response = self.http.get( - f'{url.rstrip("/")}/connectors', - params={'expand': ['info', 'status']}, - timeout=self.config._request_timeout, - **self._get_request_kwargs(), - ) - response.raise_for_status() - connectors_data = response.json() + endpoint = f'{url.rstrip("/")}/connectors' + + def _fetch(expand: str | list[str]) -> Any: + response = self.http.get( + endpoint, + params={'expand': expand}, + timeout=self.config._request_timeout, + **self._get_request_kwargs(), + ) + response.raise_for_status() + return response.json() + + connectors_data = _fetch(['info', 'status']) + + if not isinstance(connectors_data, dict): + # A combined expand list is serialized as repeated query params (expand=info&expand=status), + # which Confluent Cloud's expansions endpoint doesn't honor — it returns the unexpanded + # connector-name list instead. Retry with a single expand value before concluding the + # worker predates Kafka 2.3 / CP 5.3 and doesn't support expand at all. + connectors_data = _fetch('info') if not isinstance(connectors_data, dict): - # Older Connect workers (pre-Kafka 2.3 / CP 5.3) ignore the expand parameter - # and return a plain list of connector names instead of the expanded dict. self.log.warning( "Unexpected response shape from %s/connectors (got %s, expected dict). " "The Connect worker may not support the expand parameter — Kafka 2.3+ / CP 5.3+ is required.", @@ -184,17 +193,43 @@ def _collect_rest(self, url: str, cluster_id: str) -> None: ) return + # Confluent Cloud honors only one expand value per request; fetch any missing section separately and merge. + for section in ('info', 'status'): + if any((entry or {}).get(section) is None for entry in connectors_data.values()): + try: + self._merge_expand(connectors_data, _fetch(section), section) + except Exception as e: + self.log.warning( + "Failed to fetch supplementary '%s' section from %s/connectors, emitting partial data: %s", + section, + url, + e, + ) + tags_base = self.config._get_tags(cluster_id) + [f'connect_url:{url}'] self.check.gauge('connector.count', len(connectors_data), tags=tags_base) - self._emit_connector_metrics(connectors_data, tags_base) self._emit_connector_config_events(connectors_data, cluster_id, url) self._collect_plugins(url, cluster_id) + @staticmethod + def _merge_expand(connectors_data: dict[str, Any], expanded: Any, section: str) -> None: + if not isinstance(expanded, dict): + return + for name, entry in connectors_data.items(): + if entry is None: + continue + if entry.get(section) is None and isinstance(expanded.get(name), dict): + val = expanded[name].get(section) + if val is not None: + entry[section] = val + def _emit_connector_metrics(self, connectors_data: dict[str, Any], tags_base: list[str]) -> None: for name, data in connectors_data.items(): - info = data.get('info', {}) - status = data.get('status', {}) + if data is None: + continue + info = data.get('info') or {} + status = data.get('status') or {} connector_type = info.get('type', 'unknown') full_class = (info.get('config') or {}).get('connector.class', '') @@ -230,8 +265,10 @@ def _emit_connector_config_events(self, connectors_data: dict[str, Any], cluster connector_contents: dict[str, str] = {} for name, data in connectors_data.items(): - info = data.get('info', {}) - status = data.get('status', {}) + if data is None: + continue + info = data.get('info') or {} + status = data.get('status') or {} connector_type = info.get('type', 'unknown') connector_state = (status.get('connector') or {}).get('state', 'UNKNOWN') diff --git a/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example b/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example index 5c28c3b688e71..c58fedc9b0faa 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example +++ b/kafka_consumer/datadog_checks/kafka_consumer/data/conf.yaml.example @@ -460,6 +460,10 @@ instances: ## Kafka Connect REST API URL or list of URLs. When set with enable_cluster_monitoring, ## collects connector metrics and configuration events from the Kafka Connect REST API. ## Accepts a single URL string or a list of URLs to monitor multiple Connect clusters. + ## For Confluent Cloud, set this to + ## ``https://api.confluent.cloud/connect/v1/environments//clusters/`` and supply the + ## Cloud API key (the key ID) as ``kafka_connect_username`` and its API secret as + ## ``kafka_connect_password``. # # kafka_connect_url: http://localhost:8083 diff --git a/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py b/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py index ad95ea27b2958..9cede0b9d5fc1 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py +++ b/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py @@ -185,7 +185,7 @@ def _send_cluster_monitoring_connection_error(self, reason: str) -> None: ) def _collect_connect_status(self, cluster_id: str) -> dict[str, bool] | None: - """Collect connector status for all configured Connect URLs, or None if unconfigured.""" + """Collect connector status for all configured Connect endpoints, or None if unconfigured.""" if not self.config._kafka_connect_urls: return None try: diff --git a/kafka_consumer/tests/test_connectors.py b/kafka_consumer/tests/test_connectors.py index d9d807ce439ba..11e8251986eca 100644 --- a/kafka_consumer/tests/test_connectors.py +++ b/kafka_consumer/tests/test_connectors.py @@ -314,3 +314,118 @@ def test_connector_metrics_match_metadata(run_connect_check, aggregator): run_connect_check(connectors_response=SAMPLE_CONNECTORS_RESPONSE) aggregator.assert_metrics_using_metadata(get_metadata_metrics()) + + +# --------------------------------------------------------------------------- +# Single-expand fallback (e.g. Confluent Cloud) +# --------------------------------------------------------------------------- + +CONFLUENT_CLOUD_URL = 'https://api.confluent.cloud/connect/v1/environments/env-abc123/clusters/lkc-xyz789' + + +def test_single_expand_sections_merged(run_connect_check, aggregator): + """A Connect endpoint honoring one expand value per request still yields complete metrics.""" + info_only = { + 'my-conn': { + 'info': { + 'type': 'source', + 'config': {'connector.class': 'io.confluent.SomeSource'}, + }, + 'status': None, + } + } + status_only = { + 'my-conn': { + 'status': { + 'connector': {'state': 'RUNNING'}, + 'tasks': [{'id': 0, 'state': 'RUNNING'}], + 'type': 'source', + } + } + } + + def get(url, **kwargs): + response = mock.MagicMock() + if 'connector-plugins' in url: + response.json.return_value = [] + return response + expand = kwargs.get('params', {}).get('expand') + response.json.return_value = status_only if expand == 'status' else info_only + return response + + _, http = run_connect_check( + get_side_effect=get, + instance_extra={'kafka_connect_url': CONFLUENT_CLOUD_URL}, + ) + + aggregator.assert_metric('kafka.connector.count', value=1) + aggregator.assert_metric('kafka.connector.task.count', value=1) + aggregator.assert_metric_has_tag('kafka.connector.task.count', 'connector_state:running') + aggregator.assert_metric('kafka.connector.task.running', value=1) + + # The info section arrives via the supplementary fetch; assert it survived the merge + # by checking the config event derived from it. + events = dsm_events(aggregator, 'connector') + assert len(events) == 1 + assert events[0]['connector'] == 'my-conn' + assert events[0]['connector_type'] == 'source' + assert events[0]['connector_state'] == 'RUNNING' + assert events[0]['config']['connector.class'] == 'io.confluent.SomeSource' + + # A supplementary /connectors fetch is issued because the combined call returns a null section. + connectors_fetches = [call for call in http.get.call_args_list if 'connector-plugins' not in call.args[0]] + assert len(connectors_fetches) == 2 + + +def test_oss_combined_response_makes_single_connectors_request(run_connect_check, aggregator): + """When the combined call returns both sections, no supplementary fetch is issued.""" + _, http = run_connect_check(connectors_response=SAMPLE_CONNECTORS_RESPONSE) + + connectors_fetches = [call for call in http.get.call_args_list if 'connector-plugins' not in call.args[0]] + assert len(connectors_fetches) == 1 + + +def test_combined_expand_list_ignored_falls_back_to_single_expand(run_connect_check, aggregator): + """A combined ``expand`` list is serialized as repeated query params, which Confluent Cloud's + expansions endpoint doesn't honor — it returns the plain connector-name list instead of the + expanded dict. The collector must retry with a single expand value rather than treating that + response as an unsupported (pre-2.3) worker. + """ + name_list = ['my-conn'] + info_only = {'my-conn': {'info': {'type': 'source', 'config': {'connector.class': 'io.confluent.SomeSource'}}}} + status_only = { + 'my-conn': { + 'status': {'connector': {'state': 'RUNNING'}, 'tasks': [{'id': 0, 'state': 'RUNNING'}], 'type': 'source'} + } + } + + def get(url, **kwargs): + response = mock.MagicMock() + if 'connector-plugins' in url: + response.json.return_value = [] + return response + expand = kwargs.get('params', {}).get('expand') + if expand == 'status': + response.json.return_value = status_only + elif expand == 'info': + response.json.return_value = info_only + else: + response.json.return_value = name_list + return response + + _, http = run_connect_check( + get_side_effect=get, + instance_extra={'kafka_connect_url': CONFLUENT_CLOUD_URL}, + ) + + aggregator.assert_metric('kafka.connector.count', value=1) + aggregator.assert_metric('kafka.connector.task.count', value=1) + aggregator.assert_metric_has_tag('kafka.connector.task.count', 'connector_state:running') + + events = dsm_events(aggregator, 'connector') + assert len(events) == 1 + assert events[0]['connector'] == 'my-conn' + assert events[0]['config']['connector.class'] == 'io.confluent.SomeSource' + + connectors_fetches = [call for call in http.get.call_args_list if 'connector-plugins' not in call.args[0]] + assert len(connectors_fetches) == 3 From d23a7d9a5041e583c5ea268a26aecd6d5311b22d Mon Sep 17 00:00:00 2001 From: Piotr WOLSKI Date: Fri, 3 Jul 2026 14:28:58 +0200 Subject: [PATCH 4/6] Don't abort kafka_consumer when one partition's high-watermark lookup fails (#24263) * Don't abort kafka_consumer when one partition's high-watermark lookup fails The high-watermark offset collection issues a single batched offsets_for_times call for all topic-partitions. If any partition cannot be resolved at all (e.g. a leaderless/offline RF-0 or mid-deletion topic), librdkafka raises a KafkaException (UNKNOWN_TOPIC_OR_PART) for the entire batch call, which propagates up and aborts the whole check so no metrics are collected. Fall back to per-partition lookups when the batched call fails, skipping only the partitions that still raise, so healthy partitions are still collected and the check is not aborted. Co-Authored-By: Claude Opus 4.8 (1M context) * Add changelog entry for PR #24263 Co-Authored-By: Claude Opus 4.8 (1M context) * Fetch high-watermark offsets via AdminClient.list_offsets Replace the resilient-but-clunky offsets_for_times approach with AdminClient.list_offsets, which returns a future per partition. Errored partitions (e.g. leaderless/offline topics that raise UNKNOWN_TOPIC_OR_PART) are skipped individually, so healthy partitions are still collected without a wholesale raise or a per-partition retry loop. list_offsets is also the purpose-built API for latest/earliest offsets, unlike offsets_for_times with sentinel timestamp values. Co-Authored-By: Claude Opus 4.8 (1M context) * Apply review fixes: degrade highwater offset collection gracefully Mirror cluster_metadata.fetch_earliest_offsets in get_partition_offsets: broaden the per-future handler to catch any Exception so one bad partition does not abort the loop, and wrap the outer list_offsets call so a request/broker-level failure logs a warning and returns [] instead of aborting the whole highwater collection. Strengthen unit tests: assert list_offsets is called with isolation_level=READ_UNCOMMITTED and the request timeout, cover the non-Kafka per-partition error and request-level failure paths, and add an empty-partitions test that issues no request. Co-Authored-By: Claude Opus 4.8 (1M context) * kafka_consumer: shorten get_partition_offsets docstring Co-Authored-By: Claude Opus 4.8 (1M context) * Re-raise on full-request list_offsets failures in get_partition_offsets A request-level failure was being swallowed into an empty result, so highwater offsets silently defaulted to 0 for every partition and corrupted partition.size/topic.size metrics instead of skipping them. Only individual partition lookup failures should be tolerated; a full-request failure should abort highwater collection like before. Co-Authored-By: Claude Sonnet 5 --------- Co-authored-by: Claude Opus 4.8 (1M context) --- kafka_consumer/changelog.d/24263.fixed | 1 + .../datadog_checks/kafka_consumer/client.py | 45 ++++--- .../kafka_consumer/kafka_consumer.py | 2 +- kafka_consumer/tests/test_cluster_metadata.py | 6 +- kafka_consumer/tests/test_unit.py | 126 ++++++++++++++++-- 5 files changed, 148 insertions(+), 32 deletions(-) create mode 100644 kafka_consumer/changelog.d/24263.fixed diff --git a/kafka_consumer/changelog.d/24263.fixed b/kafka_consumer/changelog.d/24263.fixed new file mode 100644 index 0000000000000..f752742ce3afb --- /dev/null +++ b/kafka_consumer/changelog.d/24263.fixed @@ -0,0 +1 @@ +Continue collecting high-watermark offsets for healthy partitions when an individual partition's offset lookup fails, instead of aborting the check. diff --git a/kafka_consumer/datadog_checks/kafka_consumer/client.py b/kafka_consumer/datadog_checks/kafka_consumer/client.py index d26c81beed385..f4a57d8e9dee0 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/client.py +++ b/kafka_consumer/datadog_checks/kafka_consumer/client.py @@ -3,8 +3,8 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from concurrent.futures import as_completed -from confluent_kafka import Consumer, ConsumerGroupTopicPartitions, KafkaException, TopicPartition -from confluent_kafka.admin import AdminClient +from confluent_kafka import Consumer, ConsumerGroupTopicPartitions, IsolationLevel, KafkaException, TopicPartition +from confluent_kafka.admin import AdminClient, OffsetSpec # AWS MSK IAM authentication support try: @@ -147,25 +147,30 @@ def consumer_get_cluster_id_and_list_topics(self, consumer_group): return "", [] return (cluster_id, [(name, list(metadata.partitions)) for name, metadata in cluster_metadata.topics.items()]) - def consumer_offsets_for_times(self, partitions, offset=-1): - topicpartitions_for_querying = [ - # -1: latest; 0: earliest (timestamp 0) - TopicPartition(topic=topic, partition=partition, offset=offset) - for topic, partition in partitions - ] + def get_partition_offsets(self, partitions, offset=-1): + """Return (topic, partition, offset) tuples, skipping partitions that can't be queried. + + A request-level failure (the batched list_offsets call itself raising, as opposed to an + individual partition's future) is not swallowed here: it propagates so the caller aborts + highwater collection instead of silently treating every partition as missing. + """ + offset_spec = OffsetSpec.earliest() if offset == 0 else OffsetSpec.latest() + request = {TopicPartition(topic=topic, partition=partition): offset_spec for topic, partition in partitions} + if not request: + return [] + + futures = self.kafka_client.list_offsets( + request, + isolation_level=IsolationLevel.READ_UNCOMMITTED, + request_timeout=self.config._request_timeout, + ) + results = [] - for tp in self._consumer.offsets_for_times( - partitions=topicpartitions_for_querying, timeout=self.config._request_timeout - ): - if tp.error: - self.log.debug( - "Failed to get offset for topic %s partition %s: %s", - tp.topic, - tp.partition, - tp.error, - ) - continue - results.append((tp.topic, tp.partition, tp.offset)) + for tp, future in futures.items(): + try: + results.append((tp.topic, tp.partition, future.result().offset)) + except Exception as e: + self.log.debug("Skipping offsets for %s/%s: %s", tp.topic, tp.partition, e) return results def _list_topics(self): diff --git a/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py b/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py index 9cede0b9d5fc1..bb003b6567607 100644 --- a/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py +++ b/kafka_consumer/datadog_checks/kafka_consumer/kafka_consumer.py @@ -482,7 +482,7 @@ def get_highwater_offsets(self, partitions=None): self.log.debug('Querying %s highwater offsets', len(topic_partitions_to_check)) result = {} - for topic, partition, offset in self.client.consumer_offsets_for_times( + for topic, partition, offset in self.client.get_partition_offsets( partitions=topic_partitions_to_check, offset=HIGH_WATERMARK ): result[(topic, partition)] = offset diff --git a/kafka_consumer/tests/test_cluster_metadata.py b/kafka_consumer/tests/test_cluster_metadata.py index 085cc06865980..e4b5e3b1c3dfe 100644 --- a/kafka_consumer/tests/test_cluster_metadata.py +++ b/kafka_consumer/tests/test_cluster_metadata.py @@ -166,7 +166,7 @@ def mock_offsets_for_times(partitions, offset=-1): else: return [(topic, partition, 10 if partition == 0 else 20) for topic, partition in partitions] - client.consumer_offsets_for_times = mock_offsets_for_times + client.get_partition_offsets = mock_offsets_for_times def mock_list_offsets(requests, **_kwargs): result = {} @@ -656,7 +656,7 @@ def mock_offsets(partitions, offset=-1): else: return [(topic, partition, 10 if partition == 0 else 20) for topic, partition in partitions] - mock_kafka_client.consumer_offsets_for_times = mock_offsets + mock_kafka_client.get_partition_offsets = mock_offsets # Mock cache with previous offsets baseline_cache = { @@ -721,7 +721,7 @@ def mock_offsets_run2(partitions, offset=-1): else: return [(topic, partition, 10 if partition == 0 else 20) for topic, partition in partitions] - mock_kafka_client.consumer_offsets_for_times = mock_offsets_run2 + mock_kafka_client.get_partition_offsets = mock_offsets_run2 prev_cache = json.dumps(baseline_cache) kafka_consumer_check.read_persistent_cache = mock.Mock(return_value=prev_cache) diff --git a/kafka_consumer/tests/test_unit.py b/kafka_consumer/tests/test_unit.py index 96082a8a51e24..ce025afe1aa30 100644 --- a/kafka_consumer/tests/test_unit.py +++ b/kafka_consumer/tests/test_unit.py @@ -14,7 +14,7 @@ pytestmark = [pytest.mark.unit] -def fake_consumer_offsets_for_times(partitions, offset=-1): +def fake_get_partition_offsets(partitions, offset=-1): """In our testing environment the offset is 80 for all partitions and topics.""" return [(t, p, 80) for t, p in partitions] @@ -41,7 +41,7 @@ def seed_mock_client(cluster_id="cluster_id"): ('__consumer_offsets', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ], ) - client.consumer_offsets_for_times = fake_consumer_offsets_for_times + client.get_partition_offsets = fake_get_partition_offsets return client @@ -500,7 +500,7 @@ def test_check_clears_cache_on_partial_reset(kafka_instance, check, dd_run_check mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 30)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 100)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 100)] kafka_consumer_check.client = mock_client # Cache has entries below (5, 50) and above (200) the new highwater of 100 — all must be cleared. @@ -585,7 +585,7 @@ def test_check_compacts_timestamps_and_preserves_lag_accuracy( mock_client = seed_mock_client() mock_client.get_partitions_for_topic.return_value = [0] mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, consumer_offset)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, highwater_offset)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, highwater_offset)] kafka_consumer_check.client = mock_client kafka_consumer_check.read_persistent_cache = mock.Mock(return_value=json.dumps(initial_cache)) @@ -609,7 +609,7 @@ def test_check_prunes_timestamps_below_earliest_consumer_offset(kafka_instance, mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 25)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client # Pre-seed the cache with 3 entries. Adding the new highwater at 40 fills the 4-entry @@ -747,7 +747,7 @@ def test_check_prunes_floor_uses_minimum_offset_across_groups(kafka_instance, ch ("group1", [("topic1", 0, 25)]), ("group2", [("topic1", 0, 5)]), ] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client # With floor=5 (correct min), nothing below 5 exists, so no pruning; VW keeps {10, 40}. @@ -776,7 +776,7 @@ def test_check_prunes_anchor_at_floor_boundary(kafka_instance, check, dd_run_che mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 30)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client initial_cache = {"topic1_0": {"10": 1.0, "20": 2.0, "30": 3.0}} @@ -802,7 +802,7 @@ def test_check_keeps_sole_entry_below_floor_as_anchor(kafka_instance, check, dd_ mock_client = seed_mock_client() mock_client.list_consumer_group_offsets.return_value = [("consumer_group1", [("topic1", 0, 25)])] - mock_client.consumer_offsets_for_times = lambda partitions, offset=-1: [("topic1", 0, 40)] + mock_client.get_partition_offsets = lambda partitions, offset=-1: [("topic1", 0, 40)] kafka_consumer_check.client = mock_client initial_cache = {"topic1_0": {"20": 2.0, "30": 3.0}} @@ -1054,3 +1054,113 @@ def test_connection_error_sink_failure_does_not_mask_broker_error(check, kafka_i kafka_consumer_check.event_platform_event = mock.Mock(side_effect=Exception('intake unavailable')) with pytest.raises(Exception, match="Unable to connect to the AdminClient"): dd_run_check(kafka_consumer_check) + + +def _offset_future(offset): + """Build a list_offsets future whose result() returns an object with the given offset.""" + future = mock.MagicMock() + future.result.return_value = mock.MagicMock(offset=offset) + return future + + +def _raising_future(exc): + """Build a list_offsets future whose result() raises the given exception.""" + future = mock.MagicMock() + future.result.side_effect = exc + return future + + +def test_get_partition_offsets_skips_unqueryable_partitions(): + """A partition whose list_offsets future raises is skipped; healthy partitions are still returned.""" + from confluent_kafka import KafkaException, TopicPartition + + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + + futures = { + TopicPartition(topic="healthy_topic", partition=0): _offset_future(100), + TopicPartition(topic="bad_topic", partition=0): _raising_future(KafkaException("UNKNOWN_TOPIC_OR_PART")), + } + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.return_value = futures + + results = client.get_partition_offsets([("healthy_topic", 0), ("bad_topic", 0)]) + + # (a) no exception escaped, (b) the healthy partition's offset is returned, + # (c) the unqueryable partition is skipped. + assert results == [("healthy_topic", 0, 100)] + + +def test_get_partition_offsets_skips_partition_on_non_kafka_error(): + """A non-Kafka error on one partition's future is skipped, not propagated, so the loop survives.""" + from confluent_kafka import TopicPartition + + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + + futures = { + TopicPartition(topic="healthy_topic", partition=0): _offset_future(100), + TopicPartition(topic="bad_topic", partition=0): _raising_future(RuntimeError("unexpected")), + } + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.return_value = futures + + results = client.get_partition_offsets([("healthy_topic", 0), ("bad_topic", 0)]) + + assert results == [("healthy_topic", 0, 100)] + + +def test_get_partition_offsets_raises_when_list_offsets_request_fails(): + """A request/broker-level list_offsets failure propagates, aborting highwater collection.""" + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.side_effect = RuntimeError("connection dropped") + + with pytest.raises(RuntimeError): + client.get_partition_offsets([("topic_a", 0)]) + + +def test_get_partition_offsets_empty_partitions_returns_empty_without_request(): + """No partitions means no list_offsets request is issued and an empty result is returned.""" + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + client._kafka_client = mock.MagicMock() + + results = client.get_partition_offsets([]) + + assert results == [] + assert client._kafka_client.list_offsets.call_count == 0 + + +def test_get_partition_offsets_returns_all_healthy_partitions(): + """When every list_offsets future succeeds, all partition offsets are returned.""" + from confluent_kafka import IsolationLevel, TopicPartition + + config = mock.MagicMock() + config._request_timeout = 5 + + client = KafkaClient(config, logging.getLogger(__name__)) + + futures = { + TopicPartition(topic="topic_a", partition=0): _offset_future(42), + TopicPartition(topic="topic_b", partition=1): _offset_future(7), + } + client._kafka_client = mock.MagicMock() + client._kafka_client.list_offsets.return_value = futures + + results = client.get_partition_offsets([("topic_a", 0), ("topic_b", 1)]) + + assert sorted(results) == [("topic_a", 0, 42), ("topic_b", 1, 7)] + assert client._kafka_client.list_offsets.call_count == 1 + # READ_UNCOMMITTED is load-bearing: READ_COMMITTED would return the LSO, not the true high watermark. + assert client._kafka_client.list_offsets.call_args.kwargs["isolation_level"] == IsolationLevel.READ_UNCOMMITTED + assert client._kafka_client.list_offsets.call_args.kwargs["request_timeout"] == 5 From f80cdd009b3190bec301638805d56173563de4f7 Mon Sep 17 00:00:00 2001 From: Juanpe Araque Date: Fri, 3 Jul 2026 14:42:05 +0200 Subject: [PATCH 5/6] [AI-6917] Smarter ddev release branch tag command (#23860) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Extend `release branch tag` with `--release`, `--ref`, `--rc N`, and `--yes` The command previously required the user to be on the release branch and only let them pick between `--rc` and `--final`. The new flags make it possible to tag a release branch from anywhere, pin or auto-suggest the RC number, and tag an arbitrary commit instead of the branch tip: - `--release/-r ` selects the target release branch; if not on it, a worktree is created at `.worktrees/release-tag/` from `origin/` and torn down on success. - `--ref ` tags the given commit (must be an ancestor of `origin/`) instead of the branch tip. - `--rc` accepts an optional value: `--rc` alone auto-suggests, `--rc N` pins. Warns when N skips ahead of the next available RC. - `--yes/-y` skips all yes/no confirms. * Add changelog entry * Apply round 1 review feedback and fix mypy lint - `GitRepository.tag()` now uses its `message` parameter instead of the tag name (pre-existing bug: `--message` was always set to `value`). - Compute `current_branch` once and thread it into `_resolve_target_branch` to avoid the duplicate `git rev-parse` subprocess call. - Move worktree setup inside the try/finally so any failure between worktree creation and teardown still triggers the inspection warning. - Narrow the OSError catch to just `git.tag` / `git.push` so other failures surface with their own attributable messages. - `_branch_exists_on_origin` no longer swallows OSError as "branch missing" — split into `_ensure_branch_on_origin` that distinguishes a real ls-remote failure from an empty result. - Confirmation prompt for `--ref` shows the resolved commit SHA, not the raw user input, so the user can verify what `rev-parse` resolved to. - Renamed `_create_or_refresh_worktree` to `_create_worktree` (it only adds) and the failure message now hints at the manual cleanup command. - Added an `exit_code` assertion to `test_no_worktree_when_already_on_target_branch`. - Input-validation helpers now raise `click.UsageError` so mypy follows the `NoReturn` semantics; updated the two affected tests to assert non-zero exit instead of exit code 1 (UsageError exits with 2). * Apply round 2 review feedback - Drop unused `app` arg from `_parse_rc_value` (it raises `click.UsageError` directly and never needed application state). - Include the underlying git error text in the `--ref` failure messages for both `rev-parse` and `merge-base --is-ancestor`. - Remove dead post-`range` guard in `_warn_on_rc_gap`. - Annotate `_check_open_prs` return as `list[PullRequest]`. - Rename changelog entry from `.added` to `.changed`: the new confirm prompt on a release branch is a behavior change for non-interactive callers that piped a single `y`. - Replace the `capture.return_value` fixture default with a side_effect callable that dispatches on the first argument, so new code paths that call `capture()` without an explicit override don't silently inherit the `ls-remote` payload. - Use a `_make_ref_dispatcher` helper in `--ref` tests so the mocked responses are keyed by subcommand instead of call order. - Extract `_worktree_subcommands(git, sub)` helper to dedupe the worktree-call assertions. * Apply round 3 review feedback - `--final` + `--rc` now raises `click.UsageError` (exit code 2) to match the other CLI-validation errors in the command. - Change the `RC_AUTO` sentinel to a private, unguessable string so `--rc auto` is no longer silently equivalent to bare `--rc`. - Merge `_resolve_tag_ref`'s two try/except blocks; the branch that picks the abort message keys off whether `commit_sha` was bound, which avoids the possibly-unbound warning some type-checkers emit. - `_warn_on_rc_gap` now uses `app.display_warning` to match the other new helpers in this PR. - Test additions: full call-shape assertions on the `worktree add` / `worktree remove` invocations, plus a new test that covers the `_create_worktree` abort path and its manual-cleanup hint. * Normalize backslashes in worktree path assertions for Windows * Extract helpers from `tag()` body to read as a recipe The command body had four logical phases interleaved with the worktree lifecycle. Each phase is now its own helper: - `_prepare_working_dir` decides between pull-in-place and worktree creation, returns `(git, working_dir, worktree_created)`. - `_warn_if_build_agent_yaml_stale` surfaces the recovery warning and returns whether the workflow dispatch is needed. - `_compute_new_tag` owns the existing-tag collection, RC resolution, and all RC validations (bounds, exists, gap, backward move). - `_confirm_and_push_tag` owns the open-PR check, the final confirm prompt, and the `git tag` / `git push` pair. The main `tag()` body shrinks from ~108 to ~35 lines and reads top-to-bottom as the steps of a tagging operation. The worktree `try/finally` stays in the main body — a future change can replace it with a context manager once worktree management is consolidated across the codebase. * Tag directly against `origin/` instead of via a worktree The previous design checked out the target branch in a worktree under `.worktrees/release-tag//` and ran the tag command from there. The worktree was only ever needed to read `.gitlab/build_agent.yaml` from the right commit, since every other step (listing tags, validating `--ref`, creating and pushing the tag) keys off ref/object names that any clone resolves the same way. Read the YAML via `git show :.gitlab/build_agent.yaml` and pass an explicit `ref` to `git tag` (`origin/` by default, the resolved commit when `--ref` is supplied). The user's local checkout and branches are never touched. Side effects: - Drops `WORKTREE_BASE`, `_create_worktree`, `_remove_worktree`, `_prepare_working_dir`, and the worktree `try/finally`. - Closes a silent-data-loss bug: `git worktree add -B ` force-resets a pre-existing local `` to the remote tip, orphaning any local-only commits on it. - Reads the YAML at the actual commit being tagged when `--ref` is supplied, rather than at the branch tip. - Removes the implicit `git pull origin ` that the no-worktree path used to do — tagging no longer mutates local branch state under any flag combination. * Apply round 4 review feedback - Move the `< 1` guard into `_parse_rc_value` so `--rc 0` is rejected via `click.UsageError` (exit code 2) like every other `--rc` parse failure, instead of via `app.abort` (exit code 1) after the prompt branch. The interactive-prompt branch still aborts on `< 1`, which is now the only path that reaches that check. - Add `git` as an explicit parameter to `_ensure_branch_on_origin` to match the other git-using helpers. - Tighten the worktree-regression guard in tests: filter both `run` and `capture` calls, since `git worktree` operations can go through either entry point. - `_make_ref_dispatcher` now delegates to `_capture_dispatch` for subcommands it doesn't explicitly handle, removing the duplicated `ls-remote` literal. --------- Co-authored-by: Alexey Pilyugin --- ddev/changelog.d/23860.changed | 1 + ddev/src/ddev/cli/release/branch/tag.py | 365 ++++++++++++++++++---- ddev/src/ddev/utils/git.py | 8 +- ddev/tests/cli/release/branch/test_tag.py | 308 +++++++++++++----- 4 files changed, 541 insertions(+), 141 deletions(-) create mode 100644 ddev/changelog.d/23860.changed diff --git a/ddev/changelog.d/23860.changed b/ddev/changelog.d/23860.changed new file mode 100644 index 0000000000000..887a4c848e64b --- /dev/null +++ b/ddev/changelog.d/23860.changed @@ -0,0 +1 @@ +`ddev release branch tag` now accepts `--release/-r`, `--ref`, `--rc N`, and `--yes/-y`, and prompts to confirm when run on a release branch without `--release`. Existing non-interactive callers that piped a single `y` need to pass `--yes` or one extra confirmation. diff --git a/ddev/src/ddev/cli/release/branch/tag.py b/ddev/src/ddev/cli/release/branch/tag.py index 1fa102623f521..11fc901410f58 100644 --- a/ddev/src/ddev/cli/release/branch/tag.py +++ b/ddev/src/ddev/cli/release/branch/tag.py @@ -8,23 +8,70 @@ from httpx import HTTPStatusError from packaging.version import Version +from ddev.utils.git import GitRepository + from .build_agent import BUILD_AGENT_YAML_PATH, find_build_agent_template_main_branch_matches from .create import BRANCH_NAME_REGEX if TYPE_CHECKING: from ddev.cli.application import Application + from ddev.utils.github import PullRequest UPDATE_BUILD_AGENT_YAML_WORKFLOW = 'update-build-agent-yaml.yml' # The dd-octo-sts policy grants PR-writing credentials only to this workflow on master. UPDATE_BUILD_AGENT_YAML_WORKFLOW_REF = 'master' +RELEASE_INPUT_REGEX = re.compile(r'^(\d+\.\d+)(\.x)?$') +# Sentinel used by `--rc` when the user provides the flag without a value. +# Chosen so that `--rc auto` (which users could plausibly type) is rejected by `_parse_rc_value` +# rather than silently treated as bare `--rc`. +RC_AUTO = '__rc_auto_sentinel__' @click.command @click.option( - '--final/--rc', + '--release', + '-r', + 'release', + default=None, + help=( + 'Release to tag, e.g. `7.56` or `7.56.x`. ' + 'If omitted, the current branch must be a release branch and you will be prompted to confirm.' + ), +) +@click.option( + '--ref', + 'ref', + default=None, + help=( + 'Commit (SHA, tag, or ref) to tag instead of the tip of the release branch. ' + 'Must resolve to a commit that is an ancestor of `origin/`.' + ), +) +@click.option( + '--final', + 'final', + is_flag=True, default=False, - show_default=True, - help="Whether we're tagging the final release or a release candidate (rc).", + help='Tag a final release. Mutually exclusive with `--rc`.', +) +@click.option( + '--rc', + 'rc', + is_flag=False, + flag_value=RC_AUTO, + default=None, + help=( + 'Tag a release candidate (default). Pass `--rc` alone to auto-suggest the next RC number, ' + 'or `--rc N` to pin the RC number explicitly. Mutually exclusive with `--final`.' + ), +) +@click.option( + '--yes', + '-y', + 'yes', + is_flag=True, + default=False, + help='Skip yes/no confirmation prompts and assume the user always answered yes.', ) @click.option( '--skip-open-pr-check', @@ -34,35 +81,97 @@ help='Skip checking GitHub for open PRs targeting this release branch before tagging.', ) @click.pass_obj -def tag(app, final: bool, skip_open_pr_check: bool): +def tag( + app: Application, + release: str | None, + ref: str | None, + final: bool, + rc: str | None, + yes: bool, + skip_open_pr_check: bool, +) -> None: """ - Tag the release branch either as release candidate or final release. + Tag a release branch with a release-candidate or final-release tag. + + The command always operates against `origin/` after fetching: it does not + check out or modify any local branch. The release branch is determined as follows: + + \b + - If `--release` is given (e.g. `7.56` or `7.56.x`), that branch is used. + - If `--release` is omitted and you are on a release branch, you are prompted to confirm + tagging that branch (use `--yes` to skip the prompt). + - If `--release` is omitted and you are not on a release branch, the command aborts and + asks you to pass `--release`. + + Tag-type selection: + + \b + - `--rc` (default) tags a release candidate. Pass `--rc N` to pin the RC number; pass + `--rc` alone to auto-suggest the next available RC. The command warns (but does not + abort) if the requested number leaves a gap in the RC sequence. + - `--final` tags a final release; mutually exclusive with `--rc`. + + Other options: + + \b + - `--ref ` tags an arbitrary commit instead of the branch tip. The commit must be + an ancestor of `origin/`. Useful when later commits should not be + included in the tag. + - `--yes/-y` skips all yes/no confirmations (target-branch, backward-RC, final tag). + - `--skip-open-pr-check` skips the GitHub query for open PRs targeting the branch. """ - branch_name = app.repo.git.current_branch() - release_branch = BRANCH_NAME_REGEX.match(branch_name) - if release_branch is None: - app.abort( - f'Invalid branch name: {branch_name}. Branch name must match the pattern {BRANCH_NAME_REGEX.pattern}.' - ) + if final and rc is not None: + raise click.UsageError('`--final` and `--rc` are mutually exclusive.') + is_rc = not final + pinned_rc = _parse_rc_value(rc) if is_rc else None - click.echo(app.repo.git.pull(branch_name)) - click.echo(app.repo.git.fetch_tags()) + current_branch = app.repo.git.current_branch() + target_branch = _resolve_target_branch(app, release, yes, current_branch) + + git = app.repo.git + app.display_waiting('Fetching from origin...') + git.fetch_tags() + _ensure_branch_on_origin(app, git, target_branch) + + tag_ref = _resolve_tag_ref(app, git, target_branch, ref) + effective_ref = tag_ref if tag_ref is not None else f'origin/{target_branch}' + + build_agent_yaml_needs_update = _warn_if_build_agent_yaml_stale(app, git, effective_ref) + new_tag = _compute_new_tag(app, git, target_branch, is_rc, pinned_rc, yes) + _confirm_and_push_tag(app, git, target_branch, new_tag, tag_ref, effective_ref, yes, skip_open_pr_check) - build_agent_yaml_needs_update = _build_agent_yaml_points_to_main() - # Recovery path for release branches cut before build_agent.yaml was updated. if build_agent_yaml_needs_update: - app.display_warning( - "`.gitlab/build_agent.yaml` still points to `main`.\n" - "The update PR may not have been created or merged yet.\n" - "Will trigger the workflow after the tag is pushed.\n" - "Tagging will continue." - ) + _trigger_build_agent_yaml_update_workflow(app, target_branch) + + +def _warn_if_build_agent_yaml_stale(app: Application, git: GitRepository, ref: str) -> bool: + """Warn (and return True) if `.gitlab/build_agent.yaml` at `ref` still points to `main`.""" + if not _build_agent_yaml_points_to_main(git, ref): + return False + # Recovery path for release branches cut before build_agent.yaml was updated. + app.display_warning( + "`.gitlab/build_agent.yaml` still points to `main`.\n" + "The update PR may not have been created or merged yet.\n" + "Will trigger the workflow after the tag is pushed.\n" + "Tagging will continue." + ) + return True + - major_minor_version = branch_name.replace('.x', '') +def _compute_new_tag( + app: Application, + git: GitRepository, + target_branch: str, + is_rc: bool, + pinned_rc: int | None, + yes: bool, +) -> str: + """Compute the new tag string, validating RC bounds, existence, gaps, and backward moves.""" + major_minor_version = target_branch.replace('.x', '') this_release_tags = sorted( ( Version(t) - for t in set(app.repo.git.tags(glob_pattern=major_minor_version + '.*')) + for t in set(git.tags(glob_pattern=major_minor_version + '.*')) # We take 'major.minor.x' as the branch name pattern and replace 'x' with 'patch-rc.number'. # We make the RC component optional so that final tags also match our filter. if re.match(BRANCH_NAME_REGEX.pattern.replace('x', r'\d+(\-rc\.\d+)?'), t) @@ -73,61 +182,181 @@ def tag(app, final: bool, skip_open_pr_check: bool): last_tag_was_final = last_rc is None new_patch = last_patch + 1 if last_tag_was_final else last_patch new_tag = f'{major_minor_version}.{new_patch}' - if not final: - new_rc_guess = 1 if last_tag_was_final else last_rc + 1 + if not is_rc: + return new_tag + + new_rc_guess = 1 if last_tag_was_final else last_rc + 1 + if pinned_rc is not None: + next_rc = pinned_rc + elif yes: + next_rc = new_rc_guess + click.echo(f'Using auto-suggested RC number: {next_rc}') + else: next_rc = click.prompt( - 'What RC number are we tagging? (hit ENTER to accept suggestion)', type=int, default=new_rc_guess + 'What RC number are we tagging? (hit ENTER to accept suggestion)', + type=int, + default=new_rc_guess, ) - if next_rc < 1: - app.abort('RC number must be at least 1.') - new_tag += f'-rc.{next_rc}' - if Version(new_tag) in this_release_tags: - app.abort(f'Tag {new_tag} already exists. Switch to git to overwrite it.') - if not last_tag_was_final and next_rc < last_rc: - click.secho('!!! WARNING !!!') - if not click.confirm( - f'The latest RC is {last_rc}. ' - 'You are about to go back in time by creating an RC with a number less than that. Are you sure? [y/N]' - ): - app.abort('Did not get confirmation, aborting. Did not create or push the tag.') - - prs = [] - if not skip_open_pr_check: - httpx_logger = logging.getLogger('httpx') - previous_level = httpx_logger.level - httpx_logger.setLevel(logging.WARNING) - try: - prs = app.github.list_open_pull_requests_targeting_base(branch_name) - except Exception as e: - click.secho(f'Warning: unable to check for open PRs: {e}', fg='yellow') - finally: - httpx_logger.setLevel(previous_level) - - if prs: - click.secho('!!! WARNING !!!', fg='yellow') - click.secho(f'Found {len(prs)} open PR(s) targeting base branch {branch_name}:', fg='yellow') - for pr in prs[:20]: - click.secho(f' - #{pr.number} {pr.title} ({pr.html_url})', fg='yellow') - if len(prs) > 20: - click.secho(f' ... and {len(prs) - 20} more', fg='yellow') + if next_rc < 1: + # Only reachable for the interactive prompt — `--rc N` is validated up-front in + # `_parse_rc_value`. + app.abort('RC number must be at least 1.') + new_tag += f'-rc.{next_rc}' + if Version(new_tag) in this_release_tags: + app.abort(f'Tag {new_tag} already exists. Switch to git to overwrite it.') + _warn_on_rc_gap(app, next_rc, new_rc_guess, target_branch) + if not last_tag_was_final and next_rc < last_rc: + click.secho('!!! WARNING !!!') + if not _confirm( + yes, + f'The latest RC is {last_rc}. ' + 'You are about to go back in time by creating an RC with a number less than that. Are you sure? ' + '[y/N]', + ): + app.abort('Did not get confirmation, aborting. Did not create or push the tag.') + return new_tag + + +def _confirm_and_push_tag( + app: Application, + git: GitRepository, + target_branch: str, + new_tag: str, + tag_ref: str | None, + effective_ref: str, + yes: bool, + skip_open_pr_check: bool, +) -> None: + """Surface open-PR warnings, get final confirmation, then create and push the tag. + + `tag_ref` is the user-resolved commit when `--ref` was explicitly provided (used for the + "at ?" prompt suffix). `effective_ref` is what `git tag` actually keys off — either + that same commit, or `origin/` when `--ref` was not supplied. + """ + prs = _check_open_prs(app, target_branch, skip_open_pr_check) prompt = f'Create and push this tag: {new_tag}?' if prs: - prompt = f'Open PRs found targeting {branch_name}. Create and push this tag anyway: {new_tag}?' + prompt = f'Open PRs found targeting {target_branch}. Create and push this tag anyway: {new_tag}?' + if tag_ref is not None: + prompt = prompt.rstrip('?') + f' at {tag_ref}?' - if not click.confirm(prompt): + if not _confirm(yes, prompt): app.abort('Did not get confirmation, aborting. Did not create or push the tag.') - click.echo(app.repo.git.tag(new_tag, message=new_tag)) - click.echo(app.repo.git.push(new_tag)) - if build_agent_yaml_needs_update: - _trigger_build_agent_yaml_update_workflow(app, branch_name) + try: + click.echo(git.tag(new_tag, message=new_tag, ref=effective_ref)) + click.echo(git.push(new_tag)) + except OSError as e: + app.abort(f'Failed to create or push tag `{new_tag}`: {e}') + + +def _parse_rc_value(rc: str | None) -> int | None: + if rc is None or rc == RC_AUTO: + return None + try: + value = int(rc) + except ValueError as e: + raise click.UsageError(f'`--rc` value must be a positive integer, got `{rc}`.') from e + if value < 1: + raise click.UsageError(f'`--rc` value must be a positive integer, got `{rc}`.') + return value + + +def _resolve_target_branch(app: Application, release: str | None, yes: bool, current_branch: str) -> str: + if release is not None: + match = RELEASE_INPUT_REGEX.match(release) + if match is None: + raise click.UsageError(f'Invalid `--release` value: `{release}`. Must look like `7.56` or `7.56.x`.') + return f'{match.group(1)}.x' + + if BRANCH_NAME_REGEX.match(current_branch) is None: + app.abort( + f'Current branch `{current_branch}` is not a release branch. ' + f'Pass `--release ` to choose the release branch to tag.' + ) + + if not _confirm(yes, f'You are on release branch `{current_branch}`. Tag this release?'): + app.abort('Did not get confirmation, aborting. Did not create or push the tag.') + return current_branch + +def _ensure_branch_on_origin(app: Application, git: GitRepository, branch: str) -> None: + try: + output = git.capture('ls-remote', '--heads', 'origin', branch) + except OSError as e: + app.abort(f'Failed to query `origin` for branch `{branch}`: {e}') + if not any(line.strip() for line in output.splitlines()): + app.abort(f'Release branch `{branch}` does not exist on `origin`.') + + +def _resolve_tag_ref(app: Application, git: GitRepository, target_branch: str, ref: str | None) -> str | None: + if ref is None: + return None + commit_sha: str | None = None + try: + commit_sha = git.capture('rev-parse', '--verify', f'{ref}^{{commit}}').strip() + git.capture('merge-base', '--is-ancestor', commit_sha, f'origin/{target_branch}') + except OSError as e: + if commit_sha is None: + app.abort(f'`--ref` value `{ref}` does not resolve to a commit: {e}') + app.abort( + f'`--ref` value `{ref}` (resolved to `{commit_sha}`) is not an ancestor of ' + f'`origin/{target_branch}` (or `git merge-base` itself failed): {e}' + ) + return commit_sha + + +def _warn_on_rc_gap(app: Application, next_rc: int, expected_rc: int, branch: str) -> None: + if next_rc <= expected_rc: + return + missing = list(range(expected_rc, next_rc)) + missing_list = ', '.join(str(n) for n in missing) + suggested_rc = missing[0] + app.display_warning( + f'Requested RC {next_rc} skips ahead of the next available RC ({expected_rc}). ' + f'Missing RC number(s): {missing_list}.\n' + f'To fill the gap later, run: ' + f'ddev release branch tag --release {branch} --rc {suggested_rc} --ref ' + ) -def _build_agent_yaml_points_to_main() -> bool: - from ddev.utils.fs import Path - path = Path(BUILD_AGENT_YAML_PATH) - return path.exists() and bool(find_build_agent_template_main_branch_matches(path.read_text())) +def _check_open_prs(app: Application, target_branch: str, skip_open_pr_check: bool) -> list[PullRequest]: + if skip_open_pr_check: + return [] + httpx_logger = logging.getLogger('httpx') + previous_level = httpx_logger.level + httpx_logger.setLevel(logging.WARNING) + try: + prs = app.github.list_open_pull_requests_targeting_base(target_branch) + except Exception as e: + click.secho(f'Warning: unable to check for open PRs: {e}', fg='yellow') + return [] + finally: + httpx_logger.setLevel(previous_level) + + if prs: + click.secho('!!! WARNING !!!', fg='yellow') + click.secho(f'Found {len(prs)} open PR(s) targeting base branch {target_branch}:', fg='yellow') + for pr in prs[:20]: + click.secho(f' - #{pr.number} {pr.title} ({pr.html_url})', fg='yellow') + if len(prs) > 20: + click.secho(f' ... and {len(prs) - 20} more', fg='yellow') + return prs + + +def _confirm(yes: bool, prompt: str) -> bool: + if yes: + click.echo(f'{prompt} [auto-yes]') + return True + return click.confirm(prompt) + + +def _build_agent_yaml_points_to_main(git: GitRepository, ref: str) -> bool: + try: + content = git.show_file(BUILD_AGENT_YAML_PATH, ref) + except OSError: + return False + return bool(find_build_agent_template_main_branch_matches(content)) def _trigger_build_agent_yaml_update_workflow(app: Application, branch_name: str) -> None: diff --git a/ddev/src/ddev/utils/git.py b/ddev/src/ddev/utils/git.py index 2b5a9f6865c9c..ecd4262af09ec 100644 --- a/ddev/src/ddev/utils/git.py +++ b/ddev/src/ddev/utils/git.py @@ -124,13 +124,15 @@ def pull(self, ref): def push(self, ref): return self.capture('push', 'origin', ref) - def tag(self, value, message=None): + def tag(self, value, message=None, ref=None): """ - Create a tag with an optional message. + Create a tag with an optional message, optionally at a specific commit/ref. """ cmd = ['tag', value] if message is not None: - cmd.extend(['--message', value]) + cmd.extend(['--message', message]) + if ref is not None: + cmd.append(ref) return self.capture(*cmd) def tags(self, glob_pattern=None) -> list[str]: diff --git a/ddev/tests/cli/release/branch/test_tag.py b/ddev/tests/cli/release/branch/test_tag.py index da8da18601b1e..512e350444055 100644 --- a/ddev/tests/cli/release/branch/test_tag.py +++ b/ddev/tests/cli/release/branch/test_tag.py @@ -8,6 +8,8 @@ from ddev.utils.git import GitRepository +ORIGIN_REF = 'origin/7.56.x' + def test_tag_check_open_prs_warns_and_allows_continue(ddev, git, mocker, config_file): config_file.model.github = {'user': 'test-user', 'token': 'test-token'} @@ -22,11 +24,11 @@ def test_tag_check_open_prs_warns_and_allows_continue(ddev, git, mocker, config_ return_value=[mock_pr], ) - result = ddev('release', 'branch', 'tag', '--final', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') assert result.exit_code == 0, result.output assert git.method_calls[-2:] == [ - c.tag('7.56.0', message='7.56.0'), + c.tag('7.56.0', message='7.56.0', ref=ORIGIN_REF), c.push('7.56.0'), ] assert 'Found 1 open PR(s) targeting base branch 7.56.x' in result.output @@ -41,7 +43,7 @@ def test_tag_skip_open_pr_check(ddev, git, mocker, config_file): list_prs = mocker.patch('ddev.utils.github.GitHubManager.list_open_pull_requests_targeting_base') - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='y\n') _assert_tag_pushed(git, result, '7.56.0') list_prs.assert_not_called() @@ -56,7 +58,7 @@ def test_tag_github_api_error_degrades_gracefully(ddev, git, mocker, config_file side_effect=Exception('API error'), ) - result = ddev('release', 'branch', 'tag', '--final', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') _assert_tag_pushed(git, result, '7.56.0') assert 'unable to check for open PRs' in result.output @@ -64,12 +66,14 @@ def test_tag_github_api_error_degrades_gracefully(ddev, git, mocker, config_file NO_CONFIRMATION_SO_ABORT = 'Did not get confirmation, aborting. Did not create or push the tag.' RC_NUMBER_PROMPT = 'What RC number are we tagging? (hit ENTER to accept suggestion) [{}]' +TAG_THIS_RELEASE_PROMPT = 'You are on release branch `7.56.x`. Tag this release?' +LS_REMOTE_OK = 'abc123\trefs/heads/7.56.x\n' EXAMPLE_TAGS = [ '7.56.0-rc.1', # Random RC tag from DBM. We should make sure we ignore it. '7.56.0-rc.1-dbm-agent-jobs', - # Icluding RC 11 is interesting because it makes sure we we parse the versions before we sort them. + # Including RC 11 is interesting because it makes sure we parse the versions before we sort them. # The naive sort will think RC 11 is earlier than RC 2. '7.56.0-rc.11', '7.56.0-rc.2', @@ -81,12 +85,28 @@ def test_tag_github_api_error_degrades_gracefully(ddev, git, mocker, config_file ] +def _capture_dispatch(*args): + """Dispatch `git.capture(...)` mock calls by their first argument. + + Tests can override individual subcommands by replacing `git.capture.side_effect` outright. + Defaulting to a per-subcommand mapping (instead of one global return_value) keeps unrelated + tests from silently masking new code paths. + """ + if not args: + return '' + sub = args[0] + if sub == 'ls-remote': + return LS_REMOTE_OK + return '' + + @pytest.fixture def basic_git(mocker): mock_git = mocker.create_autospec(GitRepository) # We're patching the creation of the GitRepository class. # That's why we need a function that returns the mock. mocker.patch('ddev.repo.core.GitRepository', lambda _: mock_git) + mock_git.capture.side_effect = _capture_dispatch return mock_git @@ -95,27 +115,22 @@ def git(basic_git, mocker): mocker.patch('ddev.cli.release.branch.tag._build_agent_yaml_points_to_main', return_value=False) basic_git.current_branch.return_value = '7.56.x' basic_git.tags.return_value = EXAMPLE_TAGS[:] - yield basic_git - assert basic_git.method_calls[:4] == [ - c.current_branch(), - c.pull('7.56.x'), - c.fetch_tags(), - c.tags(glob_pattern='7.56.*'), - ] + return basic_git -def _assert_tag_pushed(git, result, tag): +def _assert_tag_pushed(git, result, tag, ref=ORIGIN_REF): assert result.exit_code == 0, result.output assert git.method_calls[-2:] == [ - c.tag(tag, message=tag), + c.tag(tag, message=tag, ref=ref), c.push(tag), ] - assert f'Create and push this tag: {tag}?' in result.output + expected_prompt = f'Create and push this tag: {tag}?' + assert expected_prompt in result.output -def test_wrong_branch(ddev, basic_git): +def test_wrong_branch_no_release_aborts(ddev, basic_git): """ - Given a branch that doesn't match the release branch pattern we should abort. + With no --release and not on a release branch, the command aborts and asks for --release. """ name = 'foo' basic_git.current_branch.return_value = name @@ -123,15 +138,65 @@ def test_wrong_branch(ddev, basic_git): result = ddev('release', 'branch', 'tag') assert result.exit_code == 1, result.output - assert rf'Invalid branch name: {name}. Branch name must match the pattern ^\d+\.\d+\.x$' in result.output - assert basic_git.method_calls == [c.current_branch()] + assert 'is not a release branch' in result.output + assert '--release' in result.output + + +def test_invalid_release_value_aborts(ddev, git): + result = ddev('release', 'branch', 'tag', '--release', 'not-a-release') + assert result.exit_code != 0, result.output + assert 'Invalid `--release` value' in result.output + + +@pytest.mark.parametrize( + 'release_input', + [ + pytest.param('7.56', id='major.minor'), + pytest.param('7.56.x', id='major.minor.x'), + ], +) +def test_release_input_normalized(ddev, git, release_input): + """ + `--release` accepts both `7.56` and `7.56.x` and normalizes to `7.56.x`. + """ + result = ddev('release', 'branch', 'tag', '--release', release_input, '--final', input='y\n') + + _assert_tag_pushed(git, result, '7.56.0') + + +def test_release_branch_not_on_origin_aborts(ddev, git): + """ + If the release branch is missing from origin, the command aborts. + """ + git.capture.side_effect = lambda *args: '' + result = ddev('release', 'branch', 'tag', '--release', '7.99.x', '--final', input='y\n') + + assert result.exit_code == 1, result.output + assert 'does not exist on `origin`' in result.output + + +def test_confirm_release_branch_when_no_release_arg(ddev, git): + """ + With no --release and already on a release branch, we ask to confirm before tagging. + """ + result = ddev('release', 'branch', 'tag', '--final', input='y\ny\n') + + _assert_tag_pushed(git, result, '7.56.0') + assert TAG_THIS_RELEASE_PROMPT in result.output + + +def test_decline_release_branch_when_no_release_arg(ddev, git): + result = ddev('release', 'branch', 'tag', '--final', input='n\n') + assert result.exit_code == 1, result.output + assert TAG_THIS_RELEASE_PROMPT in result.output + assert NO_CONFIRMATION_SO_ABORT in result.output def test_middle_of_release_next_rc(ddev, git): """ We're in the middle of a release, some RCs are already done. We want to create the next RC. """ - result = ddev('release', 'branch', 'tag', input='\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input='\ny\n') _assert_tag_pushed(git, result, '7.56.0-rc.12') assert RC_NUMBER_PROMPT.format('12') in result.output @@ -149,16 +214,10 @@ def test_middle_of_release_next_rc(ddev, git): @pytest.mark.parametrize('last_rc', [11, 12]) def test_do_not_confirm_non_sequential_rc(ddev, git, rc_num, no_confirm, last_rc): """ - We're in the middle of a release, some RCs are already done. User wants to create the next RC. - - However the user asks to create an RC that's less than the latest RC number. - This is unusual, so we give a warning and ask user to confirm. If they don't, we abort. - - Important: we are not overwriting an existing RC tag. + Reject the warning when going backwards in RC numbers. """ - git.tags.return_value.append(f'7.56.0-rc.{last_rc}') - result = ddev('release', 'branch', 'tag', input=f'{rc_num}\n{no_confirm}\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num}\n{no_confirm}\n') assert RC_NUMBER_PROMPT.format(str(last_rc + 1)) in result.output assert ( @@ -172,15 +231,7 @@ def test_do_not_confirm_non_sequential_rc(ddev, git, rc_num, no_confirm, last_rc @pytest.mark.parametrize('rc_num', ['3', '10']) def test_confirm_non_sequential_rc(ddev, git, rc_num): - """ - We're in the middle of a release, some RCs are already done. User wants to create the next RC. - - However the user asks to create an RC that's less than the latest RC number. - This is unusual, so we give a warning and ask user to confirm. If they do, we create the tag. - - Important: we are not overwriting an existing RC tag. - """ - result = ddev('release', 'branch', 'tag', input=f'{rc_num}\ny\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num}\ny\ny\n') assert RC_NUMBER_PROMPT.format('12') in result.output assert ( @@ -194,13 +245,9 @@ def test_confirm_non_sequential_rc(ddev, git, rc_num): @pytest.mark.parametrize('rc_num', ['1', '5', '11']) def test_abort_if_rc_tag_exists(ddev, git, rc_num): """ - We're in the middle of a release, some RCs are already done. User wants to create the next RC. - - However the user asks to create an RC for which we already have a tag. This requires special git flags to clobber - the local and remote tags. To keep our logic simple we give up here and leave a hint how the user can proceed. + Refuse to overwrite an existing RC tag. """ - - result = ddev('release', 'branch', 'tag', input=f'{rc_num}\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num}\ny\n') assert result.exit_code == 1, result.output assert RC_NUMBER_PROMPT.format('12') in result.output @@ -208,10 +255,7 @@ def test_abort_if_rc_tag_exists(ddev, git, rc_num): def test_abort_if_tag_less_than_one(ddev, git): - """ - RC numbers less than 1 don't make any sense, so we abort if we get one. - """ - result = ddev('release', 'branch', 'tag', input='0\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input='0\ny\n') assert RC_NUMBER_PROMPT.format('12') in result.output assert result.exit_code == 1, result.output @@ -227,12 +271,8 @@ def test_abort_if_tag_less_than_one(ddev, git): ], ) def test_abort_valid_rc(ddev, git, no_confirm): - """ - The RC is fine but we don't confirm in the end, so we abort. - """ git.tags.return_value = [] - - result = ddev('release', 'branch', 'tag', input=f'\n{no_confirm}\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'\n{no_confirm}\n') assert RC_NUMBER_PROMPT.format('1') in result.output assert result.exit_code == 1, result.output @@ -243,7 +283,6 @@ def test_abort_valid_rc(ddev, git, no_confirm): 'rc_num_input, rc_num', [ pytest.param('', '1', id='implicit sequential'), - pytest.param('', '1', id='explicit sequential'), pytest.param('2', '2', id='explicit non-sequential'), ], ) @@ -251,12 +290,9 @@ def test_abort_valid_rc(ddev, git, no_confirm): def test_first_rc(ddev, git, rc_num_input, rc_num, tags, patch): """ First RC for a new release. - - We support starting with a number other than 1, though that's very unlikely to happen in practice. """ git.tags.return_value = tags - - result = ddev('release', 'branch', 'tag', input=f'{rc_num_input}\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', input=f'{rc_num_input}\ny\n') _assert_tag_pushed(git, result, f'7.56.{patch}-rc.{rc_num}') assert RC_NUMBER_PROMPT.format('1') in result.output @@ -270,19 +306,154 @@ def test_first_rc(ddev, git, rc_num_input, rc_num, tags, patch): ], ) def test_final(ddev, git, latest_final_tag, expected_new_final_tag): - """ - Create final release tag. - """ git.tags.return_value.append(latest_final_tag) - result = ddev('release', 'branch', 'tag', '--final', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') _assert_tag_pushed(git, result, expected_new_final_tag) +def test_rc_with_explicit_value(ddev, git): + """ + `--rc N` pins the RC number without prompting. + """ + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '12', input='y\n') + _assert_tag_pushed(git, result, '7.56.0-rc.12') + assert RC_NUMBER_PROMPT.format('12') not in result.output + + +def test_rc_explicit_value_skips_ahead_warns(ddev, git): + """ + `--rc N` with N > expected_next emits a gap warning but proceeds. + """ + # Last RC is 11 (per EXAMPLE_TAGS), so expected next is 12. Skipping to 15. + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '15', input='y\n') + + _assert_tag_pushed(git, result, '7.56.0-rc.15') + assert 'skips ahead' in result.output + assert 'Missing RC number(s): 12, 13, 14' in result.output + assert '--rc 12 --ref' in result.output + + +def test_rc_explicit_value_no_gap_no_warning(ddev, git): + """ + `--rc N` matching the expected next number does not emit a gap warning. + """ + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '12', input='y\n') + _assert_tag_pushed(git, result, '7.56.0-rc.12') + assert 'skips ahead' not in result.output + + +@pytest.mark.parametrize('rc_arg', ['--rc=banana', '--rc=0']) +def test_rc_invalid_value_aborts(ddev, git, rc_arg): + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', rc_arg) + assert result.exit_code != 0, result.output + assert '`--rc` value must be a positive integer' in result.output + + +def test_final_and_rc_mutually_exclusive(ddev, git): + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--rc', '3') + assert result.exit_code != 0, result.output + assert 'mutually exclusive' in result.output + + +def test_yes_skips_all_confirmations(ddev, git): + """ + `--yes` skips both the "tag this release?" prompt and the final confirm. + Also skips the RC-number prompt by using the suggested value. + """ + result = ddev('release', 'branch', 'tag', '--yes') + _assert_tag_pushed(git, result, '7.56.0-rc.12') + assert RC_NUMBER_PROMPT.format('12') not in result.output + assert 'auto-yes' in result.output + assert 'Using auto-suggested RC number: 12' in result.output + + +def test_yes_with_pinned_rc(ddev, git): + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--rc', '20', '--yes') + _assert_tag_pushed(git, result, '7.56.0-rc.20') + assert 'skips ahead' in result.output + + +def _make_ref_dispatcher(rev_parse=None, is_ancestor=None): + """Build a `capture` side_effect that dispatches by subcommand for --ref tests. + + Delegates to `_capture_dispatch` for any subcommand it doesn't explicitly handle so the + default `ls-remote` payload stays defined in exactly one place. + """ + + def dispatch(*args): + sub = args[0] if args else '' + if sub == 'rev-parse': + if isinstance(rev_parse, BaseException): + raise rev_parse + return rev_parse + if sub == 'merge-base': + if isinstance(is_ancestor, BaseException): + raise is_ancestor + return is_ancestor + return _capture_dispatch(*args) + + return dispatch + + +def test_ref_validates_and_tags_at_commit(ddev, git): + """ + `--ref ` tags that commit instead of the branch tip. + """ + git.capture.side_effect = _make_ref_dispatcher(rev_parse='cafef00d\n', is_ancestor='') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--ref', 'cafef00d', input='y\n') + + assert result.exit_code == 0, result.output + assert 'at cafef00d?' in result.output + assert git.method_calls[-2:] == [ + c.tag('7.56.0', message='7.56.0', ref='cafef00d'), + c.push('7.56.0'), + ] + + +def test_ref_does_not_resolve_aborts(ddev, git): + git.capture.side_effect = _make_ref_dispatcher(rev_parse=OSError('bad ref')) + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--ref', 'nope', input='y\n') + assert result.exit_code == 1, result.output + assert 'does not resolve to a commit' in result.output + + +def test_ref_not_ancestor_aborts(ddev, git): + git.capture.side_effect = _make_ref_dispatcher(rev_parse='badf00d\n', is_ancestor=OSError('not ancestor')) + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--ref', 'badf00d', input='y\n') + assert result.exit_code == 1, result.output + assert 'is not an ancestor of' in result.output + + +def test_no_worktree_subprocess_invoked(ddev, git): + """ + The command must never touch `git worktree`. Previously it created a worktree to check out + `origin/` for tagging; now it operates against the ref directly. Worktree ops can + go through either `run` (add/remove) or `capture` (list), so both are filtered. + """ + git.current_branch.return_value = 'master' + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') + + assert result.exit_code == 0, result.output + worktree_calls = [ + call for call in git.method_calls if call[0] in ('run', 'capture') and call.args and call.args[0] == 'worktree' + ] + assert worktree_calls == [] + + +def test_local_release_branch_not_pulled(ddev, git): + """ + The command must never pull the user's local release branch. Tagging operates against + `origin/` only, so the user's local checkout state is irrelevant. + """ + ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', input='y\n') + git.pull.assert_not_called() + + def test_build_agent_yaml_already_updated_does_not_dispatch_workflow(ddev, git, mocker): dispatch_workflow = mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow') - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='y\n') _assert_tag_pushed(git, result, '7.56.0') dispatch_workflow.assert_not_called() @@ -294,14 +465,14 @@ def test_build_agent_yaml_points_to_main_warns_and_continues(ddev, basic_git, mo mocker.patch('ddev.cli.release.branch.tag._build_agent_yaml_points_to_main', return_value=True) dispatch_workflow = mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow') - result = ddev('release', 'branch', 'tag', '--skip-open-pr-check', input='\ny\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--skip-open-pr-check', input='\ny\n') assert result.exit_code == 0, result.output assert '`.gitlab/build_agent.yaml` still points to `main`' in result.output assert 'Dispatched `update-build-agent-yaml.yml`' in result.output assert 'Tagging will continue.' in result.output dispatch_workflow.assert_called_once_with('update-build-agent-yaml.yml', 'master', {'branch': '7.56.x'}) - basic_git.tag.assert_called_once_with('7.56.0-rc.1', message='7.56.0-rc.1') + basic_git.tag.assert_called_once_with('7.56.0-rc.1', message='7.56.0-rc.1', ref=ORIGIN_REF) basic_git.push.assert_called_once_with('7.56.0-rc.1') @@ -311,7 +482,7 @@ def test_build_agent_yaml_workflow_dispatch_waits_for_tag_confirmation(ddev, bas mocker.patch('ddev.cli.release.branch.tag._build_agent_yaml_points_to_main', return_value=True) dispatch_workflow = mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow') - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='n\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='n\n') assert result.exit_code == 1, result.output assert NO_CONFIRMATION_SO_ABORT in result.output @@ -330,15 +501,12 @@ def test_build_agent_yaml_workflow_dispatch_failure_warns_and_continues(ddev, ba ), ) - result = ddev('release', 'branch', 'tag', '--final', '--skip-open-pr-check', input='y\n') + result = ddev('release', 'branch', 'tag', '--release', '7.56.x', '--final', '--skip-open-pr-check', input='y\n') assert result.exit_code == 0, result.output assert 'Warning: unable to trigger `update-build-agent-yaml.yml`: API error' in result.output assert 'gh workflow run update-build-agent-yaml.yml -f branch=7.56.x' in result.output assert 'Dispatched `update-build-agent-yaml.yml`' not in result.output dispatch_workflow.assert_called_once_with('update-build-agent-yaml.yml', 'master', {'branch': '7.56.x'}) - basic_git.tag.assert_called_once_with('7.56.0', message='7.56.0') + basic_git.tag.assert_called_once_with('7.56.0', message='7.56.0', ref=ORIGIN_REF) basic_git.push.assert_called_once_with('7.56.0') - - -# TODO: test for adding RCs for a bugfix release From 8bdc2dff6ab3f9ade81892388f6c4433c3f963e1 Mon Sep 17 00:00:00 2001 From: Juanpe Araque Date: Fri, 3 Jul 2026 15:26:06 +0200 Subject: [PATCH 6/6] [AI-6827] Add ddev release test-agent command (#23722) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ddev release test-agent command Dispatch both .github/workflows/test-agent.yml and test-agent-windows.yml against a release branch or tag, with Agent image resolution and validation against registry.datadoghq.com. The command resolves the latest published *-rc.N tag for a branch input, validates Linux and Windows (servercore) manifests, shows a confirmation panel, then fires both workflow_dispatch calls in parallel via the async GitHub client. Extends AsyncGitHubClient.create_workflow_dispatch with a typed return_run_details kwarg (overloads) so the new 200 response shape (workflow_run_id, run_url, html_url) is parsed into a WorkflowDispatchResult when requested, and the result panel links directly to each run. * Add changelog entry * Apply review feedback: tighten typing, broaden registry handling, expand tests - Rename _MANIFEST_ACCEPT to MANIFEST_ACCEPT per AGENTS.md (no underscore prefix on module constants). - Move asyncio import to module level; drop duplicate import. - Narrow _extract_run_urls signature to Sequence[GitHubResponse[WorkflowDispatchResult] | BaseException]; remove three type:ignore comments and the unused owner/repo/ref params. - Surface the originating exception (__cause__) in the abort message after a dispatch failure. - Combine messages when both Linux and Windows dispatches fail. - tag.lstrip('v') -> tag.removeprefix('v') for accurate intent. - Document REPO_OWNER design choice with a short comment. - Add one-line docstrings to manifest_url and tags_list_url. - Request a large page (n=10000) when listing registry tags; the Agent registry has many years of tags and the default page may not include the current release cycle. - Type-annotate test fixtures and test functions. - Parametrize test_branch_resolves_latest_rc over workflow_id; add a mirror test for the Linux-fails partial-dispatch case; add a both-fail case; cover 401/403/503 in the manifest error test; cover null and missing 'tags' key in the tags-list parser. * Apply round-2 review feedback + ruff 0.11.10 format Review feedback: - Pass inputs dict through to _print_plan so the plan display is derived from the same source the dispatch sees (no more hardcoded 'true'/'false' drifting from the inputs values). - Use string values ('true', 'false') for type:boolean workflow inputs to match what the GitHub workflow_dispatch API documents. - Replace assert-based type narrowing in _extract_run_urls with nested isinstance checks so the flow stays sound under python -O. - Rename surviving_label -> failing_label in the parametrized partial-dispatch test (the assertion targets the failing side's message, not the surviving). CI fix: - Reformat with ruff 0.11.10 (the version pinned by the ddev CI workflow); my local hatch env shipped 0.15.11 which produced minor multi-line layout differences. Affects test-agent.yml lint for both Linux and Windows matrices. * Harden ddev release test-agent error handling - Abort early when github.token is empty instead of leaking the AsyncGitHubClient ValueError out of asyncio.run. - Abort with a friendly message when the public Agent registry returns a non-404 HTTP error from manifest_exists or list_agent_rc_tags, rather than surfacing the raw httpx traceback. - Narrow the inputs dict to dict[str, str] all the way through _dispatch_both / _dispatch_both_async; the workflow_dispatch API rejects non-string values, so an accidental non-string value now becomes a type error at the closest boundary to the API call. - Drop the cause-formatting suffix on the dispatch-failure abort; the RuntimeError messages already embed the underlying error so the cause would render twice. - Drop the unused ref parameter from _print_result. - Add tests covering timeout and connection-error propagation from the registry helpers. * Hardcode dispatch target to DataDog/integrations-core - Introduce REPO_NAME='integrations-core' constant alongside REPO_OWNER and use it in the workflow_dispatch call, so a user with ddev pointed at integrations-extras/marketplace/a fork doesn't silently dispatch to DataDog/. Both test-agent.yml workflows only exist on integrations-core, matching the existing owner hardcoding rationale. - Lift the duplicate inline 'import httpx' to a top-level import so the module's external dependencies are visible at a glance. - Add tests for the empty-token guard and for the two registry HTTP error paths that translate to friendly app.abort messages. * Extract docker_registry utility and tighten test-agent workflow checks - Move generic Docker Registry v2 helpers (manifest probe, tag listing with Link: rel=next pagination) into ddev.utils.docker_registry so they can be reused by other release tooling. The agent-specific RC filter stays in cli.release.test_agent.registry as a thin wrapper. - Read workflow files from origin/ so a branch the user has not yet fetched no longer reads as a missing workflow file. Distinguish "file not in tree" from "ref not in local clone" from other git failures so the abort message points at the real problem. - Move httpx and asyncio imports back inside the functions that use them; the registry module is lazy-imported, so the top-level imports were paid on every ddev invocation (including ddev --help). - Use add_note to keep the Windows traceback attached when both dispatches fail. Wrap the result print in try/except/else so the success-only call site is obvious. - Take versions (not full image refs) into _validate_images_exist; drop the rsplit round-trip. - Note in the inputs dict that test-py2='false' on Windows is intentional. * Lift asyncio import to module top * Surface both errors when test-agent dispatches both fail - Fold the Linux and Windows error reprs into the RuntimeError message so app.abort(str(e)) renders both. The previous add_note(...) approach stored the Windows error in __notes__, which str(exc) does not include, so the Windows side was silently dropped from the abort output. - Re-raise CancelledError and other non-Exception BaseException subclasses from _extract_run_urls before treating results as dispatch failures. asyncio.gather(return_exceptions=True) captures cancellation into the result list; wrapping it in RuntimeError would hide flow-control intent. - Pass the github token directly into _dispatch_both instead of threading the whole Application object through. Decouples the helper. - Centralize the httpx.HTTPError -> app.abort translation in a _registry_errors context manager; removes the duplicated try/except in _resolve_version and _validate_images_exist and keeps the lazy httpx import in one place. - Render the dispatch plan via display_info so it lands on stderr alongside the surrounding progress lines; piping the command no longer splits the pre-dispatch narrative across stdout and stderr. - Replace the hand-rolled Link-header parser in docker_registry.list_tags with httpx.Response.links, which handles RFC 5988 quoting and multi-link rels correctly. - Strengthen test_both_dispatches_fail_combine_messages to assert both Windows: and a count of 2 of the shared error repr so the regression cannot recur. * Split test-agent helpers into validation/images/dispatch modules Move the supporting logic for `ddev release test-agent` into sibling modules so the command file reads as the orchestration story it tells: - validation.py — input regex checks, git ref existence on origin, and workflow-file presence on the resolved ref (including the file-missing vs ref-not-fetched vs unknown-git-failure dispatch in the error path). - images.py — RC version resolution, image ref construction, manifest existence checks, and the registry_errors context manager that translates httpx errors into clean abort messages. - dispatch.py — the parallel workflow_dispatch orchestration. The async coroutine is nested inside dispatch_both so the reader sees the full flow in one function rather than bouncing between two near-empty stack frames. extract_run_urls keeps the partial/total-failure surface next to the dispatch. `__init__.py` now contains only the Click command plus the small `_print_plan`/`_print_result` display helpers. Every sibling module is imported lazily inside the command body so `ddev --help` only pays for `click` from this package. Also narrow `AsyncGitHubClient.create_workflow_dispatch`'s `inputs` parameter from `dict[str, Any] | None` to `dict[str, str] | None` across both `@overload`s and the implementation. The workflow_dispatch API contract is string-to-string (booleans are matched against the lowercase string form), every in-tree caller already passes a `dict[str, str]`, and the wider type silently accepted values that would surface as runtime 422s from GitHub. The fake test client mirror is updated to match. * Auto-fetch the target ref and model branch/tag as a sum type Make the user's `--branch` or `--tag` choice a typed `Branch | Tag` produced by `validate_input`, and have every downstream helper take that `ReleaseTarget` instead of `(branch: str | None, tag: str | None)`. This puts the "exactly one is set" invariant in the type system and removes the type-narrowing `assert`s that would otherwise turn into an `AssertionError` with no context if anyone broke the invariant. While at it, drop the `git ls-remote` probe + "please run `git fetch` and try again" hint and fetch the ref ourselves. The new `fetch_target` runs `git fetch --quiet --depth=1 origin refs/heads/:refs/remotes/origin/` (or the equivalent `refs/tags/...:refs/tags/...` for `--tag`), which both confirms the ref exists on origin and populates the local refs we need to read the workflow files. If `git fetch` reports `couldn't find remote ref`, the abort message stays the same as before (`Branch X not found on origin`); other git errors surface verbatim through `Failed to fetch ... from origin: ...`. Tests now mock `GitRepository.run` (the fetch) instead of `GitRepository.capture` (the ls-remote). Two new tests pin the exact refspec the command must send so a future refactor that breaks the fetch shape can't slip through. The "please-fetch-first" test goes away; that branch is unreachable now. * Define workflow names in validation; symmetric error shape; cover CancelledError - Move WORKFLOW_LINUX/WORKFLOW_WINDOWS from dispatch.py to validation.py and have dispatch.py import them from validation. Inverts the previous arrow so the layer that runs first owns the constants; validation no longer pulls in dispatch's async/HTTP modules at import time. - Make fetch_target's OSError handler use `if/else` for symmetry with verify_workflows_present_on_ref. Behavior unchanged; the two helpers now read identically instead of relying on `app.abort`'s `NoReturn` to keep the trailing abort unreachable. - Add direct unit tests for extract_run_urls. The existing test_command tests only feed httpx.HTTPStatusError (an Exception), so the BaseException-but-not-Exception re-raise that lets CancelledError / KeyboardInterrupt propagate was untested in CI. New tests pin both the flow-control propagation contract and the both-failure message shape. * Render dispatch result as a panel and space out the command's output blocks - Add blank-line separators between each phase of the command (fetch, version resolution, image validation, dispatch plan, final result) so the output reads as distinct sections rather than one continuous stream. - Replace the trailing `display_success` + two `display_pair` calls with a rich Panel matching the look of `ddev release port-commit`'s completion summary. The two run URLs sit inside a cyan-bordered "Workflows dispatched" panel with bold-aligned Linux/Windows labels. * Allow forced test-agent branch fetch * Add test-agent workflow monitoring * Fix mypy errors in test-agent module - dispatch.py: declare DispatchOutcome as a PEP 695 type alias so mypy recognises it as a type (the previous TYPE_CHECKING-guarded assignment read as a variable on mypy 2.1). - validation.py: validate both branch/tag invariants up front and assert the remaining one is set, removing the trailing app.abort that mypy refused to credit as a function terminator. * Match all patch RCs when resolving --branch in test-agent * Tighten test-agent types and harden error paths - Move REPO_OWNER/REPO_NAME from dispatch.py to validation.py so monitoring no longer pulls dispatch's async/HTTP module just for two strings. - Narrow monitor_workflows / monitor_dispatched_workflows first parameter from Application to Terminal — matches what the body actually uses and what the tests already pass. - Tighten extract_dispatched_workflows input to tuple[DispatchOutcome, DispatchOutcome] so the 2-result contract is explicit at the type level. - Widen the monitor_dispatched_workflows guard from RuntimeError to Exception so httpx network failures abort cleanly with the standard "Failed to monitor workflows" message. - Guard list_tags against non-JSON 2xx responses so registry_errors translates them into a clean abort instead of a raw ValueError. * Make test-agent monitor resilient and quieter - Tolerate transient httpx errors during polling: keep the prior monitor state instead of aborting the entire monitor on one bad request. - Emit a single final panel in both interactive and non-interactive paths so CI logs no longer get flooded with per-poll panels. - Move DispatchedWorkflow under TYPE_CHECKING in monitoring.py so importing monitoring does not eagerly load dispatch and its async machinery. - Surface the dispatched-run html_urls in the monitor-failed abort message so the user can resume monitoring manually. - Tighten test_dispatch fixtures from list[...] to tuple[...] to match the production signature. - Annotate the FakeAsyncGitHubClient.list_workflow_run_jobs stub with AsyncIterator[GitHubResponse[Any]] for consistency. * Format test_github_async.py with ruff --------- Co-authored-by: Alexey Pilyugin --- ddev/changelog.d/23722.added | 1 + .../src/ddev/cli/ci/tests/task_test_runner.py | 7 +- ddev/src/ddev/cli/release/__init__.py | 2 + .../ddev/cli/release/port_commit_workflow.py | 23 +- .../ddev/cli/release/test_agent/__init__.py | 170 ++++++++ .../ddev/cli/release/test_agent/dispatch.py | 104 +++++ .../src/ddev/cli/release/test_agent/images.py | 72 ++++ .../ddev/cli/release/test_agent/monitoring.py | 299 ++++++++++++++ .../ddev/cli/release/test_agent/registry.py | 40 ++ .../ddev/cli/release/test_agent/validation.py | 154 ++++++++ ddev/src/ddev/cli/terminal.py | 36 ++ ddev/src/ddev/utils/docker_registry.py | 83 ++++ ddev/src/ddev/utils/github_async/client.py | 47 ++- .../utils/github_async/models/workflow.py | 4 +- .../cli/ci/tests/test_task_test_runner.py | 1 + ddev/tests/cli/release/test_agent/__init__.py | 3 + .../cli/release/test_agent/test_command.py | 373 ++++++++++++++++++ .../cli/release/test_agent/test_dispatch.py | 94 +++++ .../cli/release/test_agent/test_monitoring.py | 312 +++++++++++++++ .../cli/release/test_agent/test_registry.py | 90 +++++ ddev/tests/cli/test_terminal.py | 42 ++ ddev/tests/helpers/github_async.py | 15 +- ddev/tests/utils/test_docker_registry.py | 169 ++++++++ ddev/tests/utils/test_github_async.py | 120 +++++- 24 files changed, 2233 insertions(+), 28 deletions(-) create mode 100644 ddev/changelog.d/23722.added create mode 100644 ddev/src/ddev/cli/release/test_agent/__init__.py create mode 100644 ddev/src/ddev/cli/release/test_agent/dispatch.py create mode 100644 ddev/src/ddev/cli/release/test_agent/images.py create mode 100644 ddev/src/ddev/cli/release/test_agent/monitoring.py create mode 100644 ddev/src/ddev/cli/release/test_agent/registry.py create mode 100644 ddev/src/ddev/cli/release/test_agent/validation.py create mode 100644 ddev/src/ddev/utils/docker_registry.py create mode 100644 ddev/tests/cli/release/test_agent/__init__.py create mode 100644 ddev/tests/cli/release/test_agent/test_command.py create mode 100644 ddev/tests/cli/release/test_agent/test_dispatch.py create mode 100644 ddev/tests/cli/release/test_agent/test_monitoring.py create mode 100644 ddev/tests/cli/release/test_agent/test_registry.py create mode 100644 ddev/tests/cli/test_terminal.py create mode 100644 ddev/tests/utils/test_docker_registry.py diff --git a/ddev/changelog.d/23722.added b/ddev/changelog.d/23722.added new file mode 100644 index 0000000000000..4d0ae91f2b214 --- /dev/null +++ b/ddev/changelog.d/23722.added @@ -0,0 +1 @@ +Add `ddev release test-agent` command that dispatches the Linux and Windows Agent test workflows against a release branch or tag. diff --git a/ddev/src/ddev/cli/ci/tests/task_test_runner.py b/ddev/src/ddev/cli/ci/tests/task_test_runner.py index 6b02821e9fd81..30dbddf3433ab 100644 --- a/ddev/src/ddev/cli/ci/tests/task_test_runner.py +++ b/ddev/src/ddev/cli/ci/tests/task_test_runner.py @@ -63,7 +63,12 @@ async def process_message(self, message: TestBatch) -> None: log_extra: dict[str, Any] = {"batch_id": message.id} dispatch = await self._client.create_workflow_dispatch( - self._options.owner, self._options.repo, self._options.workflow_id, ref=self._options.ref, inputs=inputs + self._options.owner, + self._options.repo, + self._options.workflow_id, + ref=self._options.ref, + inputs=inputs, + return_run_details=True, ) run_id = dispatch.data.workflow_run_id log_extra["run_id"] = run_id diff --git a/ddev/src/ddev/cli/release/__init__.py b/ddev/src/ddev/cli/release/__init__.py index 22d96ab906fd6..e6b136596e87f 100644 --- a/ddev/src/ddev/cli/release/__init__.py +++ b/ddev/src/ddev/cli/release/__init__.py @@ -14,6 +14,7 @@ from ddev.cli.release.port_commit import port_commit from ddev.cli.release.show import show from ddev.cli.release.stats import stats +from ddev.cli.release.test_agent import test_agent @click.group(short_help='Manage the release of integrations') @@ -33,4 +34,5 @@ def release(): release.add_command(show) release.add_command(stats) release.add_command(tag) +release.add_command(test_agent) release.add_command(upload) diff --git a/ddev/src/ddev/cli/release/port_commit_workflow.py b/ddev/src/ddev/cli/release/port_commit_workflow.py index 472f1692ecc17..b64ac145e7f49 100644 --- a/ddev/src/ddev/cli/release/port_commit_workflow.py +++ b/ddev/src/ddev/cli/release/port_commit_workflow.py @@ -695,7 +695,7 @@ def resolve_port_plan( dry_run=dry_run, ) - app.output(_format_plan_summary(plan), stderr=True) + app.output(_format_plan_summary(app, plan), stderr=True) if not dry_run and not click.confirm('Continue?'): app.abort('Did not get confirmation, aborting.') @@ -705,7 +705,6 @@ def resolve_port_plan( def display_completion_summary(app: Application, plan: PortPlan, *, pr_url: str | None) -> None: """Print a panel summarising the port outcome.""" - text = Text() rows: list[tuple[str, str]] = [ ('Commit', f'{plan.full_sha[:10]} - {plan.clean_subject}'), ('Target', plan.target_branch), @@ -714,17 +713,13 @@ def display_completion_summary(app: Application, plan: PortPlan, *, pr_url: str if pr_url is not None: rows.append(('Pull request', pr_url)) - label_width = max(len(label) for label, _ in rows) - for i, (label, value) in enumerate(rows): - if i: - text.append('\n') - text.append(f'{label}:'.ljust(label_width + 2), style='bold') - text.append(value) - - app.output(Panel(text, title='Backport completed', title_align='left', border_style='cyan'), stderr=True) + app.output( + Panel(app.labeled_lines(rows), title='Backport completed', title_align='left', border_style='cyan'), + stderr=True, + ) -def _format_plan_summary(plan: PortPlan) -> Text: +def _format_plan_summary(app: Application, plan: PortPlan) -> Text: text = Text() text.append('Configuration:', style='bold') @@ -739,10 +734,8 @@ def _format_plan_summary(plan: PortPlan) -> Text: ('Verify commit', str(plan.verify)), ('Dry run', str(plan.dry_run)), ] - for label, value in rows: - text.append('\n ') - text.append(f'{label}:', style='bold') - text.append(f' {value}') + text.append('\n') + text.append_text(app.labeled_lines(rows, indent=' ', align=False)) return text diff --git a/ddev/src/ddev/cli/release/test_agent/__init__.py b/ddev/src/ddev/cli/release/test_agent/__init__.py new file mode 100644 index 0000000000000..540b2859cd45b --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/__init__.py @@ -0,0 +1,170 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""`ddev release test-agent` — dispatch the Linux + Windows Agent test workflows. + +The orchestration body lives here so the file reads top-to-bottom as the command's story. +Each step delegates to a sibling module (`validation`, `images`, `dispatch`), and every +helper module is imported lazily inside the function body so `ddev --help` only pays for +`click` from this package. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import click + +if TYPE_CHECKING: + from collections.abc import Sequence + + from ddev.cli.application import Application + from ddev.cli.release.test_agent.dispatch import DispatchedWorkflow + + +@click.command('test-agent', short_help='Dispatch the Agent test workflows against a branch or tag') +@click.option('--branch', help='Release branch to test, e.g. `7.80.x`.') +@click.option('--tag', help='Agent release tag to test, e.g. `7.80.0-rc.1` or `7.80.0`.') +@click.option('--dry-run', is_flag=True, help='Resolve images and print the plan without dispatching.') +@click.option('--monitor', is_flag=True, help='Monitor dispatched workflow jobs until both workflows finish.') +@click.option( + '--poll-interval', + type=click.FloatRange(min=1.0), + default=10.0, + show_default=True, + help='Seconds between workflow monitor polls.', +) +@click.option('--yes', '-y', is_flag=True, help='Skip the interactive confirmation prompt.') +@click.pass_obj +def test_agent( + app: Application, + branch: str | None, + tag: str | None, + dry_run: bool, + monitor: bool, + poll_interval: float, + yes: bool, +) -> None: + """Trigger `test-agent.yml` and `test-agent-windows.yml` against the resolved Agent image. + + Exactly one of `--branch` or `--tag` must be provided. When `--branch` is given, the latest + `MAJ.MIN.PATCH-rc.N` published to `registry.datadoghq.com/agent` is used as the Agent image. + When `--tag` is given, that exact tag is used. Linux and Windows (servercore) variants are + both validated against the registry before either workflow is dispatched. + """ + import logging + + from ddev.cli.release.test_agent.dispatch import dispatch_both + from ddev.cli.release.test_agent.images import build_image_refs, resolve_version, validate_images_exist + from ddev.cli.release.test_agent.validation import ( + Branch, + fetch_target, + validate_input, + verify_workflows_present_on_ref, + ) + + logging.getLogger('httpx').setLevel(logging.WARNING) + + target = validate_input(app, branch, tag) + + if not app.config.github.token: + app.abort('GitHub token required. Set `github.token` via `ddev config set github.token `.') + + fetch_target(app, target) + verify_workflows_present_on_ref(app, target) + app.display_info('') + + version = resolve_version(app, target) + app.display_info('') + + validate_images_exist(app, version) + linux_image, windows_image = build_image_refs(version) + app.display_info('') + + # GitHub's workflow_dispatch API expects every value in `inputs` to be a string, even for + # `type: boolean` workflow inputs — booleans are parsed from the lowercase string form. + # `test-py2='false'` is sent to both dispatches by design: this command is forward-looking + # and tests Python 3 only, even on Windows (where `test-agent-windows.yml` defaults + # `test-py2` to `true` for legacy reasons). + inputs: dict[str, str] = { + 'test-py3': 'true', + 'test-py2': 'false', + 'agent-image': linux_image, + 'agent-image-windows': windows_image, + } + is_branch = isinstance(target, Branch) + _print_plan(app, ref=target.name, version=version, is_branch=is_branch, inputs=inputs) + app.display_info('') + + if dry_run: + app.display_info('Dry run — no workflows dispatched.') + return + + if not yes and not click.confirm('Dispatch both workflows?', default=False): + app.abort('Aborted by user.') + + try: + workflows = dispatch_both(app.config.github.token, ref=target.name, inputs=inputs) + except RuntimeError as e: + app.abort(str(e)) + else: + if not monitor: + app.display_info('') + _print_result(app, workflows=workflows) + + if monitor: + from ddev.cli.release.test_agent.monitoring import monitor_dispatched_workflows + + try: + monitor_dispatched_workflows( + app, + app.config.github.token, + ref=target.name, + workflows=workflows, + poll_interval=poll_interval, + ) + except Exception as e: + urls = ', '.join(w.html_url for w in workflows) + app.abort(f'Failed to monitor workflows: {e}. Runs are still in flight: {urls}') + + +def _print_plan( + app: Application, + *, + ref: str, + version: str, + is_branch: bool, + inputs: dict[str, str], +) -> None: + """Render the resolved dispatch plan via the stderr-bound `display_info` channel. + + All progress lines (`display_waiting`/`display_success`) default to stderr; keeping the + plan on the same channel means piping the command into a file leaves stdout clean and + keeps the whole pre-dispatch narrative coherent on stderr. + """ + from rich.panel import Panel + + from ddev.cli.release.test_agent.validation import WORKFLOW_LINUX, WORKFLOW_WINDOWS + + rows: list[tuple[str, str]] = [ + ('Workflows', f'{WORKFLOW_LINUX}, {WORKFLOW_WINDOWS}'), + ('Ref', ref), + ] + if is_branch: + rows.append(('Resolved RC', version)) + rows.extend(inputs.items()) + + app.output( + Panel(app.labeled_lines(rows), title='Dispatch plan', title_align='left', border_style='cyan'), stderr=True + ) + + +def _print_result(app: Application, *, workflows: Sequence[DispatchedWorkflow]) -> None: + """Render the two run URLs in a rich Panel, matching the look of `ddev release port-commit`.""" + from rich.panel import Panel + + rows = [(workflow.label, workflow.html_url) for workflow in workflows] + app.output( + Panel(app.labeled_lines(rows), title='Workflows dispatched', title_align='left', border_style='cyan'), + stderr=True, + ) diff --git a/ddev/src/ddev/cli/release/test_agent/dispatch.py b/ddev/src/ddev/cli/release/test_agent/dispatch.py new file mode 100644 index 0000000000000..3c23a343af12f --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/dispatch.py @@ -0,0 +1,104 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Parallel `workflow_dispatch` orchestration for `ddev release test-agent`.""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from ddev.cli.release.test_agent.validation import REPO_NAME, REPO_OWNER, WORKFLOW_LINUX, WORKFLOW_WINDOWS + +if TYPE_CHECKING: + from ddev.utils.github_async import GitHubResponse + from ddev.utils.github_async.models import WorkflowDispatchResult + +type DispatchOutcome = GitHubResponse[WorkflowDispatchResult] | BaseException + + +@dataclass(frozen=True) +class DispatchedWorkflow: + """A workflow run created by `ddev release test-agent`.""" + + label: str + workflow_id: str + run_id: int + html_url: str + + +def dispatch_both(token: str, *, ref: str, inputs: dict[str, str]) -> tuple[DispatchedWorkflow, DispatchedWorkflow]: + """Dispatch both workflows in parallel via the async GitHub client.""" + from ddev.utils.github_async import async_github_client + + async def run_dispatches() -> tuple[DispatchOutcome, DispatchOutcome]: + async with async_github_client(token=token) as client: + return await asyncio.gather( + client.create_workflow_dispatch( + owner=REPO_OWNER, + repo=REPO_NAME, + workflow_id=WORKFLOW_LINUX, + ref=ref, + inputs=inputs, + return_run_details=True, + ), + client.create_workflow_dispatch( + owner=REPO_OWNER, + repo=REPO_NAME, + workflow_id=WORKFLOW_WINDOWS, + ref=ref, + inputs=inputs, + return_run_details=True, + ), + return_exceptions=True, + ) + + return extract_dispatched_workflows(asyncio.run(run_dispatches())) + + +def extract_dispatched_workflows( + results: tuple[DispatchOutcome, DispatchOutcome], +) -> tuple[DispatchedWorkflow, DispatchedWorkflow]: + """Pull workflow runs out of two gather results, raising on any exception with a partial-success hint. + + `asyncio.gather(return_exceptions=True)` captures `CancelledError`/`KeyboardInterrupt` + (`BaseException` subclasses, not `Exception`) into its result list. Re-raise those first + so flow-control exceptions propagate cleanly instead of being wrapped in `RuntimeError`. + """ + linux_result, windows_result = results + + for result in (linux_result, windows_result): + if isinstance(result, BaseException) and not isinstance(result, Exception): + raise result + + if isinstance(linux_result, BaseException): + if isinstance(windows_result, BaseException): + raise RuntimeError( + f'Both dispatches failed. Linux: {linux_result!r}. Windows: {windows_result!r}.' + ) from linux_result + sibling = windows_result.data.html_url + raise RuntimeError( + f'Linux dispatch failed: {linux_result}. The other workflow was dispatched at {sibling}.' + ) from linux_result + + if isinstance(windows_result, BaseException): + sibling = linux_result.data.html_url + raise RuntimeError( + f'Windows dispatch failed: {windows_result}. The other workflow was dispatched at {sibling}.' + ) from windows_result + + return ( + DispatchedWorkflow( + label='Linux', + workflow_id=WORKFLOW_LINUX, + run_id=linux_result.data.workflow_run_id, + html_url=linux_result.data.html_url, + ), + DispatchedWorkflow( + label='Windows', + workflow_id=WORKFLOW_WINDOWS, + run_id=windows_result.data.workflow_run_id, + html_url=windows_result.data.html_url, + ), + ) diff --git a/ddev/src/ddev/cli/release/test_agent/images.py b/ddev/src/ddev/cli/release/test_agent/images.py new file mode 100644 index 0000000000000..7518b4e43fe19 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/images.py @@ -0,0 +1,72 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Resolve and validate the Agent image refs that `ddev release test-agent` dispatches against.""" + +from __future__ import annotations + +import contextlib +from typing import TYPE_CHECKING + +import httpx + +from ddev.cli.release.test_agent import registry +from ddev.cli.release.test_agent.validation import Branch, ReleaseTarget + +if TYPE_CHECKING: + from collections.abc import Iterator + + from ddev.cli.application import Application + + +@contextlib.contextmanager +def registry_errors(app: Application, scope: str) -> Iterator[None]: + """Translate any `httpx.HTTPError` raised inside the block into a clean `app.abort` message. + + `scope` is interpolated into the abort text — e.g. `'tags'` for the tag listing or an + image ref like `'registry.datadoghq.com/agent:7.80.0-rc.3'` for a manifest probe. + """ + try: + yield + except httpx.HTTPError as e: + app.abort(f'Failed to query registry.datadoghq.com for {scope}: {e}') + + +def resolve_version(app: Application, target: ReleaseTarget) -> str: + """Pick the Agent image tag to test: the explicit tag, or the highest published RC for a branch.""" + if not isinstance(target, Branch): + return target.name + + major_str, minor_str, _ = target.name.split('.') + major, minor = int(major_str), int(minor_str) + + app.display_waiting(f'Looking up latest {major}.{minor}.*-rc.* in registry.datadoghq.com...') + with registry_errors(app, 'tags'): + tags = registry.list_agent_rc_tags(major, minor) + if not tags: + app.abort( + f'No `{major}.{minor}.*-rc.*` tags found in registry.datadoghq.com/agent. ' + 'Pass --tag explicitly once the first RC is published.' + ) + latest = tags[-1] + app.display_success(f'Latest RC: {latest}') + return latest + + +def build_image_refs(version: str) -> tuple[str, str]: + """Return the (linux, windows) image refs for the given Agent version.""" + base = f'registry.datadoghq.com/agent:{version}' + return base, f'{base}-servercore' + + +def validate_images_exist(app: Application, version: str) -> None: + """Check that both the Linux (``) and Windows (`-servercore`) manifests are published.""" + for tag in (version, f'{version}-servercore'): + image = f'registry.datadoghq.com/agent:{tag}' + app.display_waiting(f'Checking `{image}`...') + with registry_errors(app, f'`{image}`'): + exists = registry.manifest_exists(tag) + if not exists: + app.abort( + f'Image `{image}` not found in registry.datadoghq.com. Confirm the Agent release has been published.' + ) diff --git a/ddev/src/ddev/cli/release/test_agent/monitoring.py b/ddev/src/ddev/cli/release/test_agent/monitoring.py new file mode 100644 index 0000000000000..1677e646a0aa7 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/monitoring.py @@ -0,0 +1,299 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Monitor dispatched Agent test workflow runs.""" + +from __future__ import annotations + +import asyncio +from collections.abc import Awaitable, Callable, Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import httpx +from rich.console import Group +from rich.live import Live +from rich.panel import Panel +from rich.spinner import Spinner +from rich.text import Text +from rich.tree import Tree + +from ddev.cli.release.test_agent.validation import REPO_NAME, REPO_OWNER +from ddev.utils.github_async import async_github_client + +if TYPE_CHECKING: + from ddev.cli.release.test_agent.dispatch import DispatchedWorkflow + from ddev.cli.terminal import Terminal + from ddev.utils.github_async.client import AsyncGitHubClient + +SUCCESS_MARK = '\u2713' +ERROR_MARK = '\u2717' +RUNNING_SPINNER = 'dots' + + +@dataclass(frozen=True) +class JobState: + """Current state for one GitHub Actions job.""" + + name: str + status: str + conclusion: str | None + html_url: str | None + + @property + def is_failure(self) -> bool: + return self.status == 'completed' and self.conclusion not in {'success', 'skipped'} + + +@dataclass(frozen=True) +class WorkflowState: + """Current state for one GitHub Actions workflow run.""" + + label: str + run_id: int + status: str + conclusion: str | None + html_url: str + jobs: list[JobState] + + @property + def is_complete(self) -> bool: + return self.status == 'completed' + + @property + def failed_jobs(self) -> list[JobState]: + return [job for job in self.jobs if job.is_failure] + + +@dataclass(frozen=True) +class MonitorState: + """Current state for all monitored workflow runs.""" + + workflows: list[WorkflowState] + + @property + def is_complete(self) -> bool: + return all(workflow.is_complete for workflow in self.workflows) + + +@dataclass(frozen=True) +class JobCounts: + """Aggregate job counts for one workflow.""" + + waiting: int = 0 + running: int = 0 + succeeded: int = 0 + failed: int = 0 + skipped: int = 0 + + +async def collect_monitor_state( + client: AsyncGitHubClient, + workflows: Sequence[DispatchedWorkflow], +) -> MonitorState: + """Fetch current workflow and job state for every dispatched workflow.""" + states = await asyncio.gather(*(collect_workflow_state(client, workflow) for workflow in workflows)) + return MonitorState(list(states)) + + +async def collect_workflow_state(client: AsyncGitHubClient, workflow: DispatchedWorkflow) -> WorkflowState: + """Fetch current workflow and job state for one dispatched workflow.""" + run_response = await client.get_workflow_run(REPO_OWNER, REPO_NAME, workflow.run_id) + run = run_response.data + jobs: list[JobState] = [] + + async for page in client.list_workflow_jobs(REPO_OWNER, REPO_NAME, workflow.run_id, per_page=100): + jobs.extend( + JobState( + name=job.name, + status=job.status, + conclusion=job.conclusion, + html_url=job.html_url, + ) + for job in page.data.jobs + ) + + return WorkflowState( + label=workflow.label, + run_id=workflow.run_id, + status=run.status, + conclusion=run.conclusion, + html_url=run.html_url or workflow.html_url, + jobs=jobs, + ) + + +def monitor_dispatched_workflows( + terminal: Terminal, + token: str, + *, + ref: str, + workflows: Sequence[DispatchedWorkflow], + poll_interval: float = 10.0, +) -> None: + """Monitor dispatched workflows until GitHub marks every run completed.""" + + async def run() -> None: + async with async_github_client(token=token) as client: + await monitor_workflows(terminal, client, ref=ref, workflows=workflows, poll_interval=poll_interval) + + asyncio.run(run()) + + +async def monitor_workflows( + terminal: Terminal, + client: AsyncGitHubClient, + *, + ref: str, + workflows: Sequence[DispatchedWorkflow], + poll_interval: float = 10.0, + sleep: Callable[[float], Awaitable[None]] = asyncio.sleep, +) -> None: + """Render a live workflow monitor until every workflow run completes.""" + state = MonitorState( + [ + WorkflowState(workflow.label, workflow.run_id, 'queued', None, workflow.html_url, []) + for workflow in workflows + ] + ) + + async def poll() -> MonitorState: + nonlocal state + try: + state = await collect_monitor_state(client, workflows) + except httpx.HTTPError: + pass + return state + + if terminal.interactive and terminal.console.is_terminal: + original_stderr = terminal.console.stderr + terminal.console.stderr = True + try: + with Live( + render_monitor_panel(terminal, ref=ref, poll_interval=poll_interval, state=state), + console=terminal.console, + auto_refresh=True, + refresh_per_second=10, + redirect_stderr=False, + redirect_stdout=False, + transient=True, + vertical_overflow='crop', + ) as live: + while True: + state = await poll() + live.update( + render_monitor_panel(terminal, ref=ref, poll_interval=poll_interval, state=state), + refresh=True, + ) + if state.is_complete: + break + await sleep(poll_interval) + finally: + terminal.console.stderr = original_stderr + else: + while True: + state = await poll() + if state.is_complete: + break + await sleep(poll_interval) + + terminal.output(render_monitor_panel(terminal, ref=ref, poll_interval=poll_interval, state=state), stderr=True) + + +def render_monitor_panel( + terminal: Terminal, + *, + ref: str, + poll_interval: float, + state: MonitorState, +) -> Panel: + """Render workflow and job state as a rich panel.""" + header = terminal.labeled_lines( + [ + ('Ref', ref), + ('Polling', f'every {poll_interval:g}s'), + ] + ) + tree = Tree('Workflows', style='bold') + for workflow in state.workflows: + workflow_branch = tree.add(_workflow_label(workflow)) + if workflow.jobs: + workflow_branch.add(_job_summary_label(workflow.jobs)) + if failed_jobs := workflow.failed_jobs: + failures_branch = workflow_branch.add(Text('Failures', style='bold red')) + for job in sorted(failed_jobs, key=lambda failed_job: failed_job.name.lower()): + failures_branch.add(_failed_job_label(job)) + elif workflow.is_complete: + workflow_branch.add(Text('No jobs reported', style='bold yellow')) + else: + workflow_branch.add(Spinner(RUNNING_SPINNER, text='Waiting for jobs', style='bold magenta')) + + return Panel( + Group(header, Text(''), tree), + title='Agent test workflow monitor', + title_align='left', + border_style='cyan', + ) + + +def _workflow_label(workflow: WorkflowState) -> Text | Spinner: + if not workflow.is_complete: + return Spinner(RUNNING_SPINNER, text=f'{workflow.label} {workflow.html_url}', style='bold magenta') + + success = workflow.conclusion == 'success' + mark = SUCCESS_MARK if success else ERROR_MARK + style = 'bold cyan' if success else 'bold red' + label = Text(f'{mark} ', style=style) + label.append(workflow.label, style=f'{style} link {workflow.html_url}') + label.append(' ') + label.append(workflow.html_url, style=f'link {workflow.html_url}') + return label + + +def _job_summary_label(jobs: Sequence[JobState]) -> Text: + counts = _count_jobs(jobs) + label = Text() + label.append('Waiting: ', style='bold') + label.append(str(counts.waiting), style='bold yellow') + label.append(' Running: ', style='bold') + label.append(str(counts.running), style='bold magenta') + label.append(' Succeeded: ', style='bold') + label.append(str(counts.succeeded), style='bold cyan') + label.append(' Failed: ', style='bold') + label.append(str(counts.failed), style='bold red' if counts.failed else 'bold') + label.append(' Skipped: ', style='bold') + label.append(str(counts.skipped), style='dim' if counts.skipped else 'bold') + return label + + +def _failed_job_label(job: JobState) -> Text: + label = Text(f'{ERROR_MARK} ', style='bold red') + if job.html_url is None: + label.append(job.name, style='bold red') + return label + + label.append(job.name, style=f'bold red link {job.html_url}') + label.append(' ') + label.append(job.html_url, style=f'red link {job.html_url}') + return label + + +def _count_jobs(jobs: Sequence[JobState]) -> JobCounts: + waiting = 0 + running = 0 + succeeded = 0 + failed = 0 + skipped = 0 + for job in jobs: + if job.status != 'completed': + if job.status == 'in_progress': + running += 1 + else: + waiting += 1 + elif job.conclusion == 'success': + succeeded += 1 + elif job.conclusion == 'skipped': + skipped += 1 + else: + failed += 1 + return JobCounts(waiting=waiting, running=running, succeeded=succeeded, failed=failed, skipped=skipped) diff --git a/ddev/src/ddev/cli/release/test_agent/registry.py b/ddev/src/ddev/cli/release/test_agent/registry.py new file mode 100644 index 0000000000000..3520e01c9c1b8 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/registry.py @@ -0,0 +1,40 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Agent-specific wrappers around `ddev.utils.docker_registry`. + +The Datadog Agent image is served by `registry.datadoghq.com/agent`. This +module fixes that repository name and exposes the only agent-specific +operation we need today: filtering the published tag list down to the +`MAJ.MIN.PATCH-rc.N` RCs for a given release line. +""" + +from __future__ import annotations + +import re + +from packaging.version import InvalidVersion, Version + +from ddev.utils import docker_registry + +AGENT_REPOSITORY = 'agent' + + +def manifest_exists(tag: str, *, timeout: float = 10.0) -> bool: + """Return True if `registry.datadoghq.com/agent:` resolves to a manifest, False on 404.""" + return docker_registry.manifest_exists(AGENT_REPOSITORY, tag, timeout=timeout) + + +def list_agent_rc_tags(major: int, minor: int, *, timeout: float = 10.0) -> list[str]: + """Return all `..-rc.N` tags published to the Agent registry, sorted ascending by version.""" + pattern = re.compile(rf'^{major}\.{minor}\.\d+-rc\.\d+$') + raw_tags = docker_registry.list_tags(AGENT_REPOSITORY, timeout=timeout) + matches: list[tuple[Version, str]] = [] + for tag in raw_tags: + if not pattern.match(tag): + continue + try: + matches.append((Version(tag), tag)) + except InvalidVersion: + continue + return [tag for _, tag in sorted(matches, key=lambda pair: pair[0])] diff --git a/ddev/src/ddev/cli/release/test_agent/validation.py b/ddev/src/ddev/cli/release/test_agent/validation.py new file mode 100644 index 0000000000000..8412074565a77 --- /dev/null +++ b/ddev/src/ddev/cli/release/test_agent/validation.py @@ -0,0 +1,154 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Input validation, ref fetching, and workflow-file checks for `ddev release test-agent`. + +The user picks either a release branch or a release tag. Downstream code needs to treat +those two paths differently (the fetch refspec, the local ref name for `git show`, the +version-resolution logic), but the type system has to know which one is in hand. Modelling +the choice as a sum type (`Branch | Tag`) — produced by `validate_input` after the +user-facing checks — keeps the branch/tag distinction explicit at every call site and +eliminates the type-narrowing `assert`s that would otherwise be needed. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ddev.cli.application import Application + +BRANCH_PATTERN = r'^\d+\.\d+\.x$' +TAG_PATTERN = r'^\d+\.\d+\.\d+(-rc\.\d+)?$' + +# Hard-coded: the two test workflows only live on DataDog/integrations-core. Forks and other +# integrations repos (extras, marketplace) have nothing to dispatch even if the branch/tag exists, +# so deferring either component to repo metadata would just hide misconfiguration. If we ever +# ship the workflows elsewhere, plumb the target through here. +REPO_OWNER = 'DataDog' +REPO_NAME = 'integrations-core' + +# The two workflow filenames the command targets. Defined here (the layer that runs first) +# rather than in `dispatch.py`, so the import graph flows validation -> dispatch and validation +# does not pull in dispatch's async/HTTP machinery at module-load time. +WORKFLOW_LINUX = 'test-agent.yml' +WORKFLOW_WINDOWS = 'test-agent-windows.yml' + +WORKFLOW_FILES = [ + f'.github/workflows/{WORKFLOW_LINUX}', + f'.github/workflows/{WORKFLOW_WINDOWS}', +] + +# git error fragments that mean "ref exists but file is not in that tree" — i.e. the workflow +# really isn't on this branch/tag, as opposed to git failing for some other reason. +GIT_FILE_MISSING_FRAGMENTS = ( + 'exists on disk', + 'does not exist', + 'no such path', +) + +# git error fragments emitted when `git fetch` cannot resolve the named ref on origin. +GIT_FETCH_MISSING_FRAGMENTS = ( + "couldn't find remote ref", + 'no such ref', +) + + +@dataclass(frozen=True) +class Branch: + """A validated release branch the user selected via `--branch`.""" + + name: str + + +@dataclass(frozen=True) +class Tag: + """A validated release tag the user selected via `--tag` (with any leading `v` stripped).""" + + name: str + + +ReleaseTarget = Branch | Tag + + +def validate_input(app: Application, branch: str | None, tag: str | None) -> ReleaseTarget: + """Normalize and validate `--branch`/`--tag`, returning a typed `Branch` or `Tag`. + + Click can't enforce the mutually-exclusive constraint by itself, so this is the single + point that does. Every downstream helper takes the resulting `ReleaseTarget` instead of + two optional strings, which keeps the "exactly one is set" invariant in the type system + rather than restated as `assert` at every call site. + """ + if branch is not None and tag is not None: + app.abort('Cannot use --branch and --tag together; pick one.') + if branch is None and tag is None: + app.abort('Exactly one of --branch or --tag must be provided.') + + if branch is not None: + if not re.match(BRANCH_PATTERN, branch): + app.abort(f'Invalid branch: {branch!r}. Must match {BRANCH_PATTERN}.') + return Branch(branch) + + assert tag is not None + normalized = tag.removeprefix('v') + if not re.match(TAG_PATTERN, normalized): + app.abort(f'Invalid tag: {tag!r}. Must match {TAG_PATTERN}.') + return Tag(normalized) + + +def local_ref_for(target: ReleaseTarget) -> str: + """The local refname that `fetch_target` populates and that `git show` can read from.""" + if isinstance(target, Branch): + return f'origin/{target.name}' + return target.name + + +def fetch_target(app: Application, target: ReleaseTarget) -> None: + """Fetch the user's release target from origin so `local_ref_for(target)` resolves locally. + + Combines the existence check (would otherwise be a `git ls-remote` probe) with the side + effect of populating the local refs we need to read the workflow files. After this call, + `git show origin/:` or `git show :` is guaranteed to find the + target in the local clone. + """ + if isinstance(target, Branch): + kind = 'branch' + refspec = f'+refs/heads/{target.name}:refs/remotes/origin/{target.name}' + else: + kind = 'tag' + refspec = f'refs/tags/{target.name}:refs/tags/{target.name}' + + app.display_waiting(f'Fetching {kind} `{target.name}` from origin...') + try: + app.repo.git.run('fetch', '--quiet', '--depth=1', 'origin', refspec) + except OSError as e: + msg = str(e).lower() + if any(fragment in msg for fragment in GIT_FETCH_MISSING_FRAGMENTS): + app.abort(f'{kind.capitalize()} `{target.name}` not found on origin.') + else: + app.abort(f'Failed to fetch {kind} `{target.name}` from origin: {e}') + + +def verify_workflows_present_on_ref(app: Application, target: ReleaseTarget) -> None: + """Confirm both workflow files exist at the target ref (which `fetch_target` made local).""" + ref = local_ref_for(target) + kind = 'branch' if isinstance(target, Branch) else 'tag' + + missing: list[str] = [] + for path in WORKFLOW_FILES: + try: + app.repo.git.show_file(path, ref) + except OSError as e: + msg = str(e).lower() + if any(fragment in msg for fragment in GIT_FILE_MISSING_FRAGMENTS): + missing.append(path) + else: + app.abort(f'Failed to read `{path}` from `{ref}`: {e}') + + if missing: + app.abort( + f'{kind.capitalize()} `{target.name}` is missing required workflow file(s): {", ".join(missing)}. ' + 'Pick a newer ref that includes both `test-agent.yml` and `test-agent-windows.yml`.' + ) diff --git a/ddev/src/ddev/cli/terminal.py b/ddev/src/ddev/cli/terminal.py index 4248034a14076..23e29ce773baf 100644 --- a/ddev/src/ddev/cli/terminal.py +++ b/ddev/src/ddev/cli/terminal.py @@ -4,6 +4,7 @@ from __future__ import annotations import os +from collections.abc import Sequence from functools import cached_property from textwrap import indent as indent_text from time import monotonic, sleep @@ -174,6 +175,41 @@ def style_info(self, text: str) -> Text: def style_debug(self, text: str) -> Text: return Text(text, style=self._style_level_debug) + def labeled_text(self, label: str, value: str | Text = '', *, width: int | None = None) -> Text: + """Return a rich label/value line using the terminal's info style for the label.""" + text = Text() + prefix = f'{label}:' + if width is None: + prefix = f'{prefix} ' + else: + prefix = prefix.ljust(width + 2) + + text.append(prefix, style=self._style_level_info) + if isinstance(value, Text): + text.append_text(value) + else: + text.append(value) + return text + + def labeled_lines( + self, + rows: Sequence[tuple[str, str | Text]], + *, + indent: str = '', + align: bool = True, + ) -> Text: + """Return aligned rich label/value lines.""" + text = Text() + width = max((len(label) for label, _ in rows), default=0) if align else None + + for i, (label, value) in enumerate(rows): + if i: + text.append('\n') + text.append(indent) + text.append_text(self.labeled_text(label, value, width=width)) + + return text + def initialize_styles(self, styles: dict): # no cov # Lazily display errors so that they use the correct style errors = [] diff --git a/ddev/src/ddev/utils/docker_registry.py b/ddev/src/ddev/utils/docker_registry.py new file mode 100644 index 0000000000000..b68b133b414ad --- /dev/null +++ b/ddev/src/ddev/utils/docker_registry.py @@ -0,0 +1,83 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Docker Registry v2 read-only helpers. + +These helpers cover anonymous reads against any Docker Registry v2 endpoint +(`registry.datadoghq.com` is the default and serves the Datadog Agent image). +They are intentionally narrow: manifest existence checks and tag listings with +`Link: rel="next"` pagination per RFC 5988. +""" + +from __future__ import annotations + +import httpx + +DEFAULT_REGISTRY_HOST = 'registry.datadoghq.com' + +# Multi-arch (manifest list) is the canonical media type; OCI index and the +# single-arch fallbacks are accepted because the registry may serve any of +# these depending on which origin (GAR vs S3) responds. +MANIFEST_ACCEPT = ', '.join( + [ + 'application/vnd.docker.distribution.manifest.list.v2+json', + 'application/vnd.oci.image.index.v1+json', + 'application/vnd.docker.distribution.manifest.v2+json', + 'application/vnd.oci.image.manifest.v1+json', + ] +) + +# Large default page size keeps the common case to one round trip. Pagination +# is followed regardless, so the value is a performance hint, not a cap. +DEFAULT_PAGE_SIZE = 10000 + + +def manifest_exists( + repository: str, + tag: str, + *, + host: str = DEFAULT_REGISTRY_HOST, + timeout: float = 10.0, +) -> bool: + """Return True if `/v2//manifests/` resolves, False on 404.""" + response = httpx.head( + f'https://{host}/v2/{repository}/manifests/{tag}', + headers={'Accept': MANIFEST_ACCEPT}, + follow_redirects=True, + timeout=timeout, + ) + if response.status_code == 404: + return False + response.raise_for_status() + return True + + +def list_tags( + repository: str, + *, + host: str = DEFAULT_REGISTRY_HOST, + page_size: int = DEFAULT_PAGE_SIZE, + timeout: float = 10.0, +) -> list[str]: + """Return every tag published for `repository`, following `Link: rel="next"` pagination. + + Pagination is parsed via `httpx.Response.links`, which handles RFC 5988 quoting and + multi-link headers correctly. Relative URLs in the next link are resolved against + `host` (registries commonly return paths like `/v2/...?last=...`). + """ + url: str | None = f'https://{host}/v2/{repository}/tags/list?n={page_size}' + tags: list[str] = [] + while url is not None: + response = httpx.get(url, timeout=timeout) + response.raise_for_status() + try: + payload = response.json() + except ValueError as e: + raise httpx.HTTPError(f'Registry returned non-JSON body for tag list: {e}') from e + page = payload.get('tags') or [] + tags.extend(page) + next_url = response.links.get('next', {}).get('url') + if next_url and next_url.startswith('/'): + next_url = f'https://{host}{next_url}' + url = next_url + return tags diff --git a/ddev/src/ddev/utils/github_async/client.py b/ddev/src/ddev/utils/github_async/client.py index 050968ad43ced..9575f726af295 100644 --- a/ddev/src/ddev/utils/github_async/client.py +++ b/ddev/src/ddev/utils/github_async/client.py @@ -12,7 +12,7 @@ from contextlib import asynccontextmanager from dataclasses import dataclass from pathlib import Path -from typing import Any, Literal, Self +from typing import Any, Literal, Self, overload import httpx from pydantic import BaseModel, ConfigDict, Field @@ -162,6 +162,7 @@ def _parse_response[T: BaseModel](response: httpx.Response, model: type[T]) -> G # Endpoint methods # ------------------------------------------------------------------ + @overload async def create_workflow_dispatch( self, owner: str, @@ -170,7 +171,34 @@ async def create_workflow_dispatch( ref: str, inputs: dict[str, str] | None = None, timeout: float | None = None, - ) -> GitHubResponse[WorkflowDispatchResult]: + *, + return_run_details: Literal[True], + ) -> GitHubResponse[WorkflowDispatchResult]: ... + + @overload + async def create_workflow_dispatch( + self, + owner: str, + repo: str, + workflow_id: str | int, + ref: str, + inputs: dict[str, str] | None = None, + timeout: float | None = None, + *, + return_run_details: Literal[False] = False, + ) -> GitHubResponse[None]: ... + + async def create_workflow_dispatch( + self, + owner: str, + repo: str, + workflow_id: str | int, + ref: str, + inputs: dict[str, str] | None = None, + timeout: float | None = None, + *, + return_run_details: bool = False, + ) -> GitHubResponse[WorkflowDispatchResult] | GitHubResponse[None]: """ Calls the GitHub API to trigger a workflow dispatch event. @@ -184,20 +212,29 @@ async def create_workflow_dispatch( ref: Branch or tag name to run the workflow on. inputs: Optional key/value inputs forwarded to the workflow. timeout: Optional timeout for this specific request. Defaults to the client's default_timeout. + return_run_details: When True, requests a 200 response with the new run's metadata + (workflow_run_id, run_url, html_url) instead of the default 204 No Content. + See https://github.blog/changelog/2026-02-19-workflow-dispatch-api-now-returns-run-ids/. Returns: - GitHubResponse[WorkflowDispatchResult]: The dispatched run id and headers. + When ``return_run_details=False`` (default): ``GitHubResponse[None]`` wrapping the 204. + When ``return_run_details=True``: ``GitHubResponse[WorkflowDispatchResult]`` with the new run's + IDs and URLs. """ - body: dict[str, Any] = {"ref": ref, "return_run_details": True} + body: dict[str, Any] = {"ref": ref} if inputs is not None: body["inputs"] = inputs + if return_run_details: + body["return_run_details"] = True response = await self._request( "POST", f"/repos/{owner}/{repo}/actions/workflows/{workflow_id}/dispatches", timeout=timeout, json=body, ) - return self._parse_response(response, WorkflowDispatchResult) + if return_run_details: + return self._parse_response(response, WorkflowDispatchResult) + return GitHubResponse[None].model_validate({"data": None, "headers": dict(response.headers)}) async def get_workflow_run( self, diff --git a/ddev/src/ddev/utils/github_async/models/workflow.py b/ddev/src/ddev/utils/github_async/models/workflow.py index 264dc500889d2..49af424bf6dbf 100644 --- a/ddev/src/ddev/utils/github_async/models/workflow.py +++ b/ddev/src/ddev/utils/github_async/models/workflow.py @@ -23,11 +23,13 @@ class WorkflowRun(BaseModel): class WorkflowDispatchResult(BaseModel): - """Response payload from a successful workflow dispatch.""" + """Run metadata returned by `POST /actions/workflows/{id}/dispatches` when `return_run_details=True`.""" model_config = ConfigDict(extra="ignore") workflow_run_id: int + run_url: str + html_url: str class Artifact(BaseModel): diff --git a/ddev/tests/cli/ci/tests/test_task_test_runner.py b/ddev/tests/cli/ci/tests/test_task_test_runner.py index 8313b88ce6812..7ef13d5a8d2fd 100644 --- a/ddev/tests/cli/ci/tests/test_task_test_runner.py +++ b/ddev/tests/cli/ci/tests/test_task_test_runner.py @@ -186,6 +186,7 @@ async def test_dispatches_workflow_with_job_list_payload(tmp_path: Path) -> None "workflow_id": "test-batch.yaml", "ref": "master", "timeout": None, + "return_run_details": True, "inputs": { "batch_id": "batch-1", "checkout_sha": "merge-sha-bbb", diff --git a/ddev/tests/cli/release/test_agent/__init__.py b/ddev/tests/cli/release/test_agent/__init__.py new file mode 100644 index 0000000000000..75c6647cb9233 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/ddev/tests/cli/release/test_agent/test_command.py b/ddev/tests/cli/release/test_agent/test_command.py new file mode 100644 index 0000000000000..a726efa3b871c --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_command.py @@ -0,0 +1,373 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations + +import httpx +import pytest +from pytest_mock import MockerFixture + +from tests.helpers.github_async import DEFAULT_DISPATCH_HTML_URL, FakeAsyncGitHubClient +from tests.helpers.runner import CliRunner + +EXPECTED_INPUTS = { + 'test-py3': 'true', + 'test-py2': 'false', + 'agent-image': 'registry.datadoghq.com/agent:7.80.0-rc.3', + 'agent-image-windows': 'registry.datadoghq.com/agent:7.80.0-rc.3-servercore', +} + + +@pytest.fixture(autouse=True) +def _silence_git(mocker: MockerFixture) -> None: + """Default git mocks: `fetch_target` succeeds and `git show` returns workflow yaml.""" + mocker.patch('ddev.utils.git.GitRepository.run', return_value=None) + mocker.patch('ddev.utils.git.GitRepository.show_file', return_value='workflow yaml') + + +@pytest.fixture(autouse=True) +def _silence_registry(mocker: MockerFixture) -> None: + """Default registry mocks: every manifest exists and there is one RC tag available.""" + mocker.patch('ddev.cli.release.test_agent.registry.manifest_exists', return_value=True) + mocker.patch( + 'ddev.cli.release.test_agent.registry.list_agent_rc_tags', + return_value=['7.80.0-rc.1', '7.80.0-rc.3'], + ) + + +@pytest.mark.parametrize( + 'args, expected', + [ + pytest.param([], 'Exactly one of --branch or --tag', id='neither'), + pytest.param(['--branch', '7.80.x', '--tag', '7.80.0'], 'Cannot use --branch and --tag together', id='both'), + pytest.param(['--branch', '7.80'], 'Invalid branch', id='bad-branch'), + pytest.param(['--tag', '7.80'], 'Invalid tag', id='bad-tag'), + ], +) +def test_input_validation(ddev: CliRunner, args: list[str], expected: str) -> None: + result = ddev('release', 'test-agent', *args) + assert result.exit_code != 0, result.output + assert expected in result.output + + +def test_tag_with_leading_v_is_accepted(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient) -> None: + result = ddev('release', 'test-agent', '--tag', 'v7.80.0-rc.1', '--yes') + assert result.exit_code == 0, result.output + call = fake_async_github.last_call('create_workflow_dispatch') + assert call.kwargs['inputs']['agent-image'] == 'registry.datadoghq.com/agent:7.80.0-rc.1' + + +@pytest.mark.parametrize('workflow_id', ['test-agent.yml', 'test-agent-windows.yml']) +def test_branch_resolves_latest_rc_dispatches_both( + ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient, workflow_id: str +) -> None: + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code == 0, result.output + fake_async_github.assert_called_with( + 'create_workflow_dispatch', + owner='DataDog', + repo='integrations-core', + workflow_id=workflow_id, + ref='7.80.x', + inputs=EXPECTED_INPUTS, + timeout=None, + return_run_details=True, + ) + + +@pytest.mark.parametrize('workflow_id', ['test-agent.yml', 'test-agent-windows.yml']) +def test_tag_dispatches_both(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient, workflow_id: str) -> None: + """A bare `--tag` (no `v` prefix, no `-rc` suffix) must drive the same dispatch shape as `--branch`.""" + result = ddev('release', 'test-agent', '--tag', '7.80.0', '--yes') + + assert result.exit_code == 0, result.output + fake_async_github.assert_called_with( + 'create_workflow_dispatch', + owner='DataDog', + repo='integrations-core', + workflow_id=workflow_id, + ref='7.80.0', + inputs={ + 'test-py3': 'true', + 'test-py2': 'false', + 'agent-image': 'registry.datadoghq.com/agent:7.80.0', + 'agent-image-windows': 'registry.datadoghq.com/agent:7.80.0-servercore', + }, + timeout=None, + return_run_details=True, + ) + + +def test_dry_run_does_not_dispatch(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient) -> None: + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--dry-run') + + assert result.exit_code == 0, result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + assert 'Dry run' in result.output + + +def test_monitor_invokes_workflow_monitor( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, +) -> None: + monitor = mocker.patch('ddev.cli.release.test_agent.monitoring.monitor_dispatched_workflows', return_value=None) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes', '--monitor') + + assert result.exit_code == 0, result.output + monitor.assert_called_once() + _, token = monitor.call_args.args + assert token == 'ghp_test' + workflows = monitor.call_args.kwargs['workflows'] + assert monitor.call_args.kwargs['poll_interval'] == 10.0 + assert [workflow.label for workflow in workflows] == ['Linux', 'Windows'] + assert [workflow.html_url for workflow in workflows] == [DEFAULT_DISPATCH_HTML_URL, DEFAULT_DISPATCH_HTML_URL] + assert 'Workflows dispatched' not in result.output + assert DEFAULT_DISPATCH_HTML_URL not in result.output + + +def test_monitor_uses_requested_poll_interval( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, +) -> None: + monitor = mocker.patch('ddev.cli.release.test_agent.monitoring.monitor_dispatched_workflows', return_value=None) + + result = ddev( + 'release', + 'test-agent', + '--branch', + '7.80.x', + '--yes', + '--monitor', + '--poll-interval', + '15', + ) + + assert result.exit_code == 0, result.output + assert monitor.call_args.kwargs['poll_interval'] == 15.0 + + +def test_monitor_error_aborts_command( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, +) -> None: + mocker.patch( + 'ddev.cli.release.test_agent.monitoring.monitor_dispatched_workflows', + side_effect=RuntimeError('GitHub API unavailable'), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes', '--monitor') + + assert result.exit_code != 0, result.output + assert 'Failed to monitor workflows: GitHub API unavailable' in result.output + + +def test_branch_with_no_rcs_aborts( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + mocker.patch('ddev.cli.release.test_agent.registry.list_agent_rc_tags', return_value=[]) + + result = ddev('release', 'test-agent', '--branch', '7.99.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'No `7.99.*-rc.*` tags found' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_missing_image_aborts(ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient) -> None: + mocker.patch('ddev.cli.release.test_agent.registry.manifest_exists', return_value=False) + + result = ddev('release', 'test-agent', '--tag', '9.99.0-rc.1', '--yes') + + assert result.exit_code != 0, result.output + assert 'not found in registry.datadoghq.com' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_missing_ref_aborts(ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient) -> None: + """When `git fetch` reports the ref does not exist on origin, surface a clean abort.""" + mocker.patch( + 'ddev.utils.git.GitRepository.run', + side_effect=OSError("fatal: couldn't find remote ref refs/heads/7.80.x"), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Branch `7.80.x` not found on origin' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_fetch_target_is_called_with_branch_refspec( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """The command must fetch the branch into `origin/` before reading workflow files.""" + run_spy = mocker.patch('ddev.utils.git.GitRepository.run', return_value=None) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code == 0, result.output + run_spy.assert_any_call('fetch', '--quiet', '--depth=1', 'origin', '+refs/heads/7.80.x:refs/remotes/origin/7.80.x') + + +def test_fetch_target_is_called_with_tag_refspec( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """A `--tag` invocation must fetch into `refs/tags/` so `git show :path` works.""" + run_spy = mocker.patch('ddev.utils.git.GitRepository.run', return_value=None) + + result = ddev('release', 'test-agent', '--tag', '7.80.0-rc.1', '--yes') + + assert result.exit_code == 0, result.output + run_spy.assert_any_call('fetch', '--quiet', '--depth=1', 'origin', 'refs/tags/7.80.0-rc.1:refs/tags/7.80.0-rc.1') + + +def test_fetch_target_other_error_surfaces_original( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """Anything other than 'remote ref not found' must surface as a generic fetch-failed abort.""" + mocker.patch( + 'ddev.utils.git.GitRepository.run', + side_effect=OSError('fatal: unable to access https://github.com/...: Could not resolve host'), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Failed to fetch branch `7.80.x` from origin' in result.output + assert 'Could not resolve host' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_missing_workflow_file_aborts( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """A 'file exists on disk, but not in ' git error reads as the workflow really being absent.""" + mocker.patch( + 'ddev.utils.git.GitRepository.show_file', + side_effect=OSError( + "fatal: path '.github/workflows/test-agent.yml' exists on disk, but not in 'origin/7.80.x'" + ), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'missing required workflow file' in result.output + assert '7.80.x' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +def test_unknown_git_failure_surfaces_original_error( + ddev: CliRunner, mocker: MockerFixture, fake_async_github: FakeAsyncGitHubClient +) -> None: + """Anything we don't recognise from git must bubble up — never silently look like a missing workflow.""" + mocker.patch( + 'ddev.utils.git.GitRepository.show_file', + side_effect=OSError('fatal: index file corrupt'), + ) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Failed to read' in result.output + assert 'index file corrupt' in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') + + +@pytest.mark.parametrize( + 'failing_workflow, failing_label', + [ + pytest.param('test-agent-windows.yml', 'Windows', id='windows-fails'), + pytest.param('test-agent.yml', 'Linux', id='linux-fails'), + ], +) +def test_partial_dispatch_failure_surfaces_sibling_url( + ddev: CliRunner, + fake_async_github: FakeAsyncGitHubClient, + failing_workflow: str, + failing_label: str, +) -> None: + """When only one dispatch fails, the surviving side's URL must appear in the error message.""" + err = httpx.HTTPStatusError( + 'forbidden', + request=httpx.Request('POST', 'https://api.github.com/'), + response=httpx.Response(403), + ) + fake_async_github.mock_response('create_workflow_dispatch', err, workflow_id=failing_workflow) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert f'{failing_label} dispatch failed' in result.output + assert DEFAULT_DISPATCH_HTML_URL in result.output + + +def test_both_dispatches_fail_combine_messages(ddev: CliRunner, fake_async_github: FakeAsyncGitHubClient) -> None: + """Both-fail must announce itself explicitly and surface both error reprs, not just substrings. + + Asserting on `forbidden` appearing twice catches the previous regression where `add_note` + was used to attach the Windows error — `str(exc)` does not include notes, so the Windows + side was silently dropped from the abort message. + """ + err = httpx.HTTPStatusError( + 'forbidden', + request=httpx.Request('POST', 'https://api.github.com/'), + response=httpx.Response(403), + ) + fake_async_github.mock_response('create_workflow_dispatch', err) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'Both dispatches failed' in result.output + assert 'Linux:' in result.output + assert 'Windows:' in result.output + assert result.output.count('forbidden') == 2 + + +def test_missing_github_token_aborts(ddev: CliRunner, mocker: MockerFixture, config_file) -> None: + """The empty-token guard must abort before any dispatch attempt.""" + config_file.model.github.token = '' + config_file.save() + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert 'GitHub token required' in result.output + + +@pytest.mark.parametrize( + 'mock_target, expected_message', + [ + pytest.param( + 'ddev.cli.release.test_agent.registry.list_agent_rc_tags', + 'Failed to query registry.datadoghq.com for tags', + id='list_tags', + ), + pytest.param( + 'ddev.cli.release.test_agent.registry.manifest_exists', + 'Failed to query registry.datadoghq.com for', + id='manifest_exists', + ), + ], +) +def test_registry_http_error_aborts_cleanly( + ddev: CliRunner, + mocker: MockerFixture, + fake_async_github: FakeAsyncGitHubClient, + mock_target: str, + expected_message: str, +) -> None: + """A transient httpx error from the registry must surface via app.abort, not a raw traceback.""" + mocker.patch(mock_target, side_effect=httpx.ConnectError('connection refused')) + + result = ddev('release', 'test-agent', '--branch', '7.80.x', '--yes') + + assert result.exit_code != 0, result.output + assert expected_message in result.output + fake_async_github.assert_not_called('create_workflow_dispatch') diff --git a/ddev/tests/cli/release/test_agent/test_dispatch.py b/ddev/tests/cli/release/test_agent/test_dispatch.py new file mode 100644 index 0000000000000..f8bc99a0f1401 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_dispatch.py @@ -0,0 +1,94 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Direct unit tests for `dispatch.extract_dispatched_workflows`. + +The CLI-level tests in `test_command.py` only ever raise `httpx.HTTPStatusError` (an +`Exception`) from the dispatch call, so they do not exercise the +`BaseException`-but-not-`Exception` re-raise guard that lets `CancelledError` and +`KeyboardInterrupt` propagate. These tests pin that contract on the helper directly. +""" + +from __future__ import annotations + +import asyncio +from typing import Any + +import pytest + +from ddev.cli.release.test_agent.dispatch import extract_dispatched_workflows +from ddev.utils.github_async import GitHubResponse +from ddev.utils.github_async.models import WorkflowDispatchResult + + +def _ok(run_id: int, html_url: str) -> GitHubResponse[WorkflowDispatchResult]: + return GitHubResponse( + data=WorkflowDispatchResult( + workflow_run_id=run_id, + run_url=f'https://api.github.com/{html_url}', + html_url=html_url, + ), + headers={}, + ) + + +def test_extract_dispatched_workflows_returns_both_runs() -> None: + results = (_ok(1, 'https://github.com/x/runs/1'), _ok(2, 'https://github.com/x/runs/2')) + + linux, windows = extract_dispatched_workflows(results) + assert linux.label == 'Linux' + assert linux.workflow_id == 'test-agent.yml' + assert linux.run_id == 1 + assert linux.html_url == 'https://github.com/x/runs/1' + assert windows.label == 'Windows' + assert windows.workflow_id == 'test-agent-windows.yml' + assert windows.run_id == 2 + assert windows.html_url == 'https://github.com/x/runs/2' + + +@pytest.mark.parametrize( + 'flow_control_exc', + [ + pytest.param(asyncio.CancelledError('user pressed ctrl-c'), id='cancelled-error'), + pytest.param(KeyboardInterrupt(), id='keyboard-interrupt'), + ], +) +@pytest.mark.parametrize( + 'position', + [ + pytest.param('linux', id='linux-cancelled'), + pytest.param('windows', id='windows-cancelled'), + ], +) +def test_extract_dispatched_workflows_reraises_flow_control_exceptions( + flow_control_exc: BaseException, position: str +) -> None: + """`CancelledError` / `KeyboardInterrupt` in either slot must propagate verbatim, not be wrapped.""" + other = _ok(1, 'https://github.com/x/runs/sibling') + results: tuple[Any, Any] = (flow_control_exc, other) if position == 'linux' else (other, flow_control_exc) + + with pytest.raises(type(flow_control_exc)): + extract_dispatched_workflows(results) + + +def test_extract_dispatched_workflows_wraps_regular_exception_failure_as_runtime_error() -> None: + """Sanity check that the `Exception` branch still produces a wrapped RuntimeError.""" + err = RuntimeError('forbidden') + results = (err, _ok(2, 'https://github.com/x/runs/2')) + + with pytest.raises(RuntimeError, match=r'Linux dispatch failed:.*forbidden.*runs/2'): + extract_dispatched_workflows(results) + + +def test_extract_dispatched_workflows_both_failures_includes_both_reprs() -> None: + """Both-fail must surface both error reprs in the RuntimeError message (not via __notes__).""" + linux_err = RuntimeError('linux-side detail') + windows_err = RuntimeError('windows-side detail') + + with pytest.raises(RuntimeError) as excinfo: + extract_dispatched_workflows((linux_err, windows_err)) + + message = str(excinfo.value) + assert 'Both dispatches failed' in message + assert 'linux-side detail' in message + assert 'windows-side detail' in message diff --git a/ddev/tests/cli/release/test_agent/test_monitoring.py b/ddev/tests/cli/release/test_agent/test_monitoring.py new file mode 100644 index 0000000000000..1e65466198af9 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_monitoring.py @@ -0,0 +1,312 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations + +import importlib +from typing import Any + +from ddev.cli.release.test_agent.dispatch import DispatchedWorkflow +from ddev.cli.release.test_agent.monitoring import ( + JobState, + MonitorState, + WorkflowState, + collect_monitor_state, + monitor_workflows, + render_monitor_panel, +) +from ddev.cli.terminal import Terminal +from ddev.utils.github_async import GitHubResponse +from ddev.utils.github_async.models import WorkflowJob, WorkflowJobsList, WorkflowRun +from tests.helpers.github_async import FakeAsyncGitHubClient + +monitoring_module = importlib.import_module('ddev.cli.release.test_agent.monitoring') + + +def test_render_monitor_panel_shows_workflow_summary() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + state = MonitorState( + workflows=[ + WorkflowState( + label='Linux', + run_id=1, + status='in_progress', + conclusion=None, + html_url='https://github.com/DataDog/integrations-core/actions/runs/1', + jobs=[ + JobState( + name='test-agent / py3.12', + status='in_progress', + conclusion=None, + html_url='https://github.com/DataDog/integrations-core/actions/runs/1/job/10', + ) + ], + ) + ] + ) + + with terminal.console.capture() as capture: + terminal.console.print(render_monitor_panel(terminal, ref='7.80.x', poll_interval=30, state=state)) + + output = capture.get() + assert 'Agent test workflow monitor' in output + assert 'Ref: 7.80.x' in output + assert 'Polling: every 30s' in output + assert 'Linux' in output + assert 'Waiting: 0' in output + assert 'Running: 1' in output + assert 'Succeeded: 0' in output + assert 'Failed: 0' in output + assert 'Skipped: 0' in output + + +def test_render_monitor_panel_includes_failed_job_url() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + job_url = 'https://github.com/DataDog/integrations-core/actions/runs/2/job/20' + state = MonitorState( + workflows=[ + WorkflowState( + label='Windows', + run_id=2, + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/2', + jobs=[ + JobState( + name='test-agent-windows / py3.12', + status='completed', + conclusion='failure', + html_url=job_url, + ) + ], + ) + ] + ) + + with terminal.console.capture() as capture: + terminal.console.print(render_monitor_panel(terminal, ref='7.80.x', poll_interval=30, state=state)) + + output = capture.get() + assert 'Windows' in output + assert 'Failures' in output + assert 'test-agent-windows / py3.12' in output + assert 'actions/runs/2/job/' in output + assert '20' in output + + +def test_render_monitor_panel_counts_skipped_jobs_without_listing_them() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + state = MonitorState( + workflows=[ + WorkflowState( + label='Linux', + run_id=1, + status='completed', + conclusion='success', + html_url='https://github.com/DataDog/integrations-core/actions/runs/1', + jobs=[ + JobState('successful job', 'completed', 'success', None), + JobState('skipped job', 'completed', 'skipped', None), + JobState('failed job', 'completed', 'failure', 'https://github.com/job/failed'), + ], + ) + ] + ) + + with terminal.console.capture() as capture: + terminal.console.print(render_monitor_panel(terminal, ref='7.80.x', poll_interval=30, state=state)) + + output = capture.get() + assert 'Skipped: 1' in output + assert 'Failed: 1' in output + assert 'failed job' in output + assert 'skipped job' not in output + assert 'successful job' not in output + + +def test_monitor_state_is_complete_only_when_all_workflows_complete() -> None: + state = MonitorState( + workflows=[ + WorkflowState('Linux', 1, 'completed', 'success', 'https://github.com/runs/1', []), + WorkflowState('Windows', 2, 'in_progress', None, 'https://github.com/runs/2', []), + ] + ) + + assert not state.is_complete + + completed = MonitorState( + workflows=[ + WorkflowState('Linux', 1, 'completed', 'success', 'https://github.com/runs/1', []), + WorkflowState('Windows', 2, 'completed', 'failure', 'https://github.com/runs/2', []), + ] + ) + + assert completed.is_complete + + +def test_dispatched_workflow_has_run_metadata() -> None: + workflow = DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + + assert workflow.run_id == 123 + + +async def test_collect_monitor_state_fetches_workflow_jobs(fake_async_github: FakeAsyncGitHubClient) -> None: + fake_async_github.mock_response( + 'get_workflow_run', + WorkflowRun( + id=123, + name='test-agent', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ), + ) + fake_async_github.mock_response( + 'list_workflow_jobs', + WorkflowJobsList( + total_count=1, + jobs=[ + WorkflowJob( + id=456, + run_id=123, + name='test-agent / py3.12', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123/job/456', + ) + ], + ), + ) + + state = await collect_monitor_state( + fake_async_github, + [ + DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + ], + ) + + assert state.is_complete + assert state.workflows[0].conclusion == 'failure' + assert ( + state.workflows[0].jobs[0].html_url == 'https://github.com/DataDog/integrations-core/actions/runs/123/job/456' + ) + + +async def test_monitor_workflows_does_not_raise_on_job_failure(fake_async_github: FakeAsyncGitHubClient) -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + fake_async_github.mock_response( + 'get_workflow_run', + GitHubResponse( + data=WorkflowRun( + id=123, + name='test-agent', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ), + headers={}, + ), + ) + fake_async_github.mock_response( + 'list_workflow_jobs', + GitHubResponse( + data=WorkflowJobsList( + total_count=1, + jobs=[ + WorkflowJob( + id=456, + run_id=123, + name='test-agent / py3.12', + status='completed', + conclusion='failure', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123/job/456', + ) + ], + ), + headers={}, + ), + ) + + await monitor_workflows( + terminal, + fake_async_github, + ref='7.80.x', + workflows=[ + DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + ], + poll_interval=0, + ) + + +async def test_monitor_workflows_does_not_use_alternate_screen( + fake_async_github: FakeAsyncGitHubClient, monkeypatch +) -> None: + terminal = Terminal(verbosity=0, enable_color=True, interactive=True) + live_kwargs: dict[str, Any] = {} + + class FakeLive: + def __init__(self, *args: Any, **kwargs: Any) -> None: + live_kwargs.update(kwargs) + + def __enter__(self) -> FakeLive: + return self + + def __exit__(self, *args: Any) -> None: + pass + + def update(self, *args: Any, **kwargs: Any) -> None: + pass + + monkeypatch.setattr(monitoring_module, 'Live', FakeLive) + fake_async_github.mock_response( + 'get_workflow_run', + GitHubResponse( + data=WorkflowRun( + id=123, + name='test-agent', + status='completed', + conclusion='success', + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ), + headers={}, + ), + ) + fake_async_github.mock_response( + 'list_workflow_jobs', + GitHubResponse(data=WorkflowJobsList(total_count=0, jobs=[]), headers={}), + ) + + await monitor_workflows( + terminal, + fake_async_github, + ref='7.80.x', + workflows=[ + DispatchedWorkflow( + label='Linux', + workflow_id='test-agent.yml', + run_id=123, + html_url='https://github.com/DataDog/integrations-core/actions/runs/123', + ) + ], + poll_interval=0, + ) + + assert live_kwargs.get('screen') is not True + assert live_kwargs['auto_refresh'] is True + assert live_kwargs['refresh_per_second'] == 10 + assert live_kwargs['vertical_overflow'] == 'crop' diff --git a/ddev/tests/cli/release/test_agent/test_registry.py b/ddev/tests/cli/release/test_agent/test_registry.py new file mode 100644 index 0000000000000..cd9018b2e8283 --- /dev/null +++ b/ddev/tests/cli/release/test_agent/test_registry.py @@ -0,0 +1,90 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Tests for the agent-specific wrapper around `ddev.utils.docker_registry`. + +The registry transport itself is covered by `tests/utils/test_docker_registry.py`. +These tests focus on the RC filter and version sort layered on top. +""" + +from __future__ import annotations + +import httpx +import pytest +from pytest_mock import MockerFixture + +from ddev.cli.release.test_agent.registry import list_agent_rc_tags, manifest_exists + + +def test_manifest_exists_delegates_to_utility(mocker: MockerFixture) -> None: + spy = mocker.patch('ddev.utils.docker_registry.manifest_exists', return_value=True) + + assert manifest_exists('7.80.0-rc.1') is True + spy.assert_called_once_with('agent', '7.80.0-rc.1', timeout=10.0) + + +def test_list_agent_rc_tags_filters_and_sorts(mocker: MockerFixture) -> None: + mocker.patch( + 'ddev.utils.docker_registry.list_tags', + return_value=[ + '7.79.0-rc.1', + '7.80.0-rc.10', + '7.80.0-rc.2', + '7.80.0-rc.1', + '7.80.0', + '7.80.0-rc.1-servercore', + '7-rc', + 'latest', + ], + ) + + assert list_agent_rc_tags(7, 80) == ['7.80.0-rc.1', '7.80.0-rc.2', '7.80.0-rc.10'] + + +def test_list_agent_rc_tags_includes_later_patch_rcs(mocker: MockerFixture) -> None: + mocker.patch( + 'ddev.utils.docker_registry.list_tags', + return_value=[ + '7.79.0-rc.1', + '7.79.0-rc.8', + '7.79.1-rc.1', + '7.79.1-rc.2', + '7.79.0', + '7.79.1-rc.1-servercore', + '7.80.0-rc.1', + ], + ) + + assert list_agent_rc_tags(7, 79) == [ + '7.79.0-rc.1', + '7.79.0-rc.8', + '7.79.1-rc.1', + '7.79.1-rc.2', + ] + + +@pytest.mark.parametrize( + 'all_tags', + [ + pytest.param([], id='empty-list'), + pytest.param(['7.79.0-rc.1', '7.81.0-rc.1', 'latest'], id='no-matching-minor'), + ], +) +def test_list_agent_rc_tags_returns_empty_when_no_match(mocker: MockerFixture, all_tags: list[str]) -> None: + mocker.patch('ddev.utils.docker_registry.list_tags', return_value=all_tags) + + assert list_agent_rc_tags(7, 80) == [] + + +@pytest.mark.parametrize( + 'exc', + [ + pytest.param(httpx.ConnectError('connection refused'), id='connect-error'), + pytest.param(httpx.ReadTimeout('read timed out'), id='read-timeout'), + ], +) +def test_list_agent_rc_tags_propagates_network_errors(mocker: MockerFixture, exc: Exception) -> None: + mocker.patch('ddev.utils.docker_registry.list_tags', side_effect=exc) + + with pytest.raises(type(exc)): + list_agent_rc_tags(7, 80) diff --git a/ddev/tests/cli/test_terminal.py b/ddev/tests/cli/test_terminal.py new file mode 100644 index 0000000000000..5fcbfc51f1da8 --- /dev/null +++ b/ddev/tests/cli/test_terminal.py @@ -0,0 +1,42 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations + +from rich.text import Text + +from ddev.cli.terminal import Terminal + + +def test_labeled_text_formats_label_and_value() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + + text = terminal.labeled_text('Workflows', 'test-agent.yml, test-agent-windows.yml') + + assert text.plain == 'Workflows: test-agent.yml, test-agent-windows.yml' + assert text.spans[0].start == 0 + assert text.spans[0].end == len('Workflows: ') + + +def test_labeled_text_accepts_rich_value() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + value = Text('https://github.com/DataDog/integrations-core/actions/runs/1', style='link https://github.com') + + text = terminal.labeled_text('Linux', value) + + assert text.plain == 'Linux: https://github.com/DataDog/integrations-core/actions/runs/1' + assert any(span.style == 'link https://github.com' for span in text.spans) + + +def test_labeled_lines_aligns_labels() -> None: + terminal = Terminal(verbosity=0, enable_color=False, interactive=False) + + text = terminal.labeled_lines( + [ + ('Ref', '7.80.x'), + ('Resolved RC', '7.80.0-rc.3'), + ], + indent=' ', + ) + + assert text.plain == ' Ref: 7.80.x\n Resolved RC: 7.80.0-rc.3' diff --git a/ddev/tests/helpers/github_async.py b/ddev/tests/helpers/github_async.py index 0ff79c53421e4..28ad713ed6e11 100644 --- a/ddev/tests/helpers/github_async.py +++ b/ddev/tests/helpers/github_async.py @@ -53,6 +53,10 @@ def test_thing(fake_async_github): WorkflowRun, ) +# Stable URL baked into the default `create_workflow_dispatch` response. Exported so tests +# that assert on the URL can reference the helper rather than duplicating the literal. +DEFAULT_DISPATCH_HTML_URL = 'https://github.com/test/repo/actions/runs/1' + @dataclass class RecordedRequest: @@ -88,7 +92,11 @@ def _default_response_factories() -> dict[str, Callable[[], Any]]: response=httpx.Response(404), ), 'create_workflow_dispatch': lambda: GitHubResponse( - data=WorkflowDispatchResult(workflow_run_id=123), + data=WorkflowDispatchResult( + workflow_run_id=123, + run_url='https://api.github.com/repos/test/repo/actions/runs/123', + html_url=DEFAULT_DISPATCH_HTML_URL, + ), headers={}, ), # Default to a completed/successful run so happy-path tests don't have to register one. @@ -260,7 +268,9 @@ async def create_workflow_dispatch( ref: str, inputs: dict[str, str] | None = None, timeout: float | None = None, - ) -> GitHubResponse[WorkflowDispatchResult]: + *, + return_run_details: bool = False, + ) -> GitHubResponse[Any]: return self._call( 'create_workflow_dispatch', owner=owner, @@ -269,6 +279,7 @@ async def create_workflow_dispatch( ref=ref, inputs=inputs, timeout=timeout, + return_run_details=return_run_details, ) async def get_workflow_run( diff --git a/ddev/tests/utils/test_docker_registry.py b/ddev/tests/utils/test_docker_registry.py new file mode 100644 index 0000000000000..810eb79f584f8 --- /dev/null +++ b/ddev/tests/utils/test_docker_registry.py @@ -0,0 +1,169 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Tests for the generic Docker Registry v2 helpers in `ddev.utils.docker_registry`.""" + +from __future__ import annotations + +import httpx +import pytest +from pytest_mock import MockerFixture + +from ddev.utils import docker_registry + + +def _response( + status: int, + *, + json_body: object | None = None, + headers: dict[str, str] | None = None, + method: str = 'GET', + url: str = 'https://registry.datadoghq.com/v2/agent/tags/list', +) -> httpx.Response: + request = httpx.Request(method, url) + kwargs: dict[str, object] = {'request': request} + if json_body is not None: + kwargs['json'] = json_body + if headers is not None: + kwargs['headers'] = headers + return httpx.Response(status, **kwargs) + + +@pytest.mark.parametrize( + 'status_code, expected', + [ + pytest.param(200, True, id='exists'), + pytest.param(404, False, id='missing'), + ], +) +def test_manifest_exists_resolves_status(mocker: MockerFixture, status_code: int, expected: bool) -> None: + mocker.patch('httpx.head', return_value=_response(status_code, method='HEAD')) + + assert docker_registry.manifest_exists('agent', '7.80.0-rc.1') is expected + + +@pytest.mark.parametrize('status', [401, 403, 500, 503]) +def test_manifest_exists_raises_on_other_errors(mocker: MockerFixture, status: int) -> None: + mocker.patch('httpx.head', return_value=_response(status, method='HEAD')) + + with pytest.raises(httpx.HTTPStatusError): + docker_registry.manifest_exists('agent', '7.80.0-rc.1') + + +@pytest.mark.parametrize( + 'exc', + [ + pytest.param(httpx.ConnectError('connection refused'), id='connect-error'), + pytest.param(httpx.ReadTimeout('read timed out'), id='read-timeout'), + pytest.param(httpx.ConnectTimeout('connect timed out'), id='connect-timeout'), + ], +) +def test_manifest_exists_propagates_network_errors(mocker: MockerFixture, exc: Exception) -> None: + mocker.patch('httpx.head', side_effect=exc) + + with pytest.raises(type(exc)): + docker_registry.manifest_exists('agent', '7.80.0-rc.1') + + +def test_manifest_exists_uses_custom_host(mocker: MockerFixture) -> None: + spy = mocker.patch('httpx.head', return_value=_response(200, method='HEAD')) + + docker_registry.manifest_exists('repo', 'tag', host='private.registry.example') + + spy.assert_called_once() + assert spy.call_args.args[0] == 'https://private.registry.example/v2/repo/manifests/tag' + + +def test_list_tags_returns_single_page(mocker: MockerFixture) -> None: + mocker.patch('httpx.get', return_value=_response(200, json_body={'name': 'agent', 'tags': ['a', 'b', 'c']})) + + assert docker_registry.list_tags('agent') == ['a', 'b', 'c'] + + +@pytest.mark.parametrize( + 'payload', + [ + pytest.param({'name': 'agent', 'tags': []}, id='empty-list'), + pytest.param({'name': 'agent', 'tags': None}, id='null-tags'), + pytest.param({'name': 'agent'}, id='missing-key'), + ], +) +def test_list_tags_handles_missing_tags(mocker: MockerFixture, payload: dict[str, object]) -> None: + mocker.patch('httpx.get', return_value=_response(200, json_body=payload)) + + assert docker_registry.list_tags('agent') == [] + + +def test_list_tags_follows_relative_link_header(mocker: MockerFixture) -> None: + """A `Link: ; rel="next"` header on page 1 must be followed before returning.""" + page_1 = _response( + 200, + json_body={'name': 'agent', 'tags': ['a', 'b']}, + headers={'Link': '; rel="next"'}, + ) + page_2 = _response( + 200, + json_body={'name': 'agent', 'tags': ['c', 'd']}, + ) + get = mocker.patch('httpx.get', side_effect=[page_1, page_2]) + + result = docker_registry.list_tags('agent', page_size=2) + + assert result == ['a', 'b', 'c', 'd'] + assert get.call_count == 2 + assert get.call_args_list[1].args[0] == 'https://registry.datadoghq.com/v2/agent/tags/list?n=2&last=b' + + +def test_list_tags_follows_absolute_link_header(mocker: MockerFixture) -> None: + page_1 = _response( + 200, + json_body={'name': 'agent', 'tags': ['a']}, + headers={'Link': '; rel="next"'}, + ) + page_2 = _response(200, json_body={'name': 'agent', 'tags': ['b']}) + get = mocker.patch('httpx.get', side_effect=[page_1, page_2]) + + result = docker_registry.list_tags('agent') + + assert result == ['a', 'b'] + assert get.call_args_list[1].args[0] == 'https://other.example/v2/agent/tags/list?last=a' + + +def test_list_tags_stops_when_link_header_absent(mocker: MockerFixture) -> None: + """If the final page omits the Link header, iteration ends — no infinite loop.""" + page = _response(200, json_body={'name': 'agent', 'tags': ['only']}) + get = mocker.patch('httpx.get', return_value=page) + + result = docker_registry.list_tags('agent') + + assert result == ['only'] + assert get.call_count == 1 + + +def test_list_tags_ignores_non_next_link_relations(mocker: MockerFixture) -> None: + """Other rels like `last`, `prev`, `up` must not be followed as if they were `next`.""" + page = _response( + 200, + json_body={'name': 'agent', 'tags': ['only']}, + headers={'Link': '; rel="last", ; rel="prev"'}, + ) + get = mocker.patch('httpx.get', return_value=page) + + result = docker_registry.list_tags('agent') + + assert result == ['only'] + assert get.call_count == 1 + + +@pytest.mark.parametrize( + 'exc', + [ + pytest.param(httpx.ConnectError('connection refused'), id='connect-error'), + pytest.param(httpx.ReadTimeout('read timed out'), id='read-timeout'), + ], +) +def test_list_tags_propagates_network_errors(mocker: MockerFixture, exc: Exception) -> None: + mocker.patch('httpx.get', side_effect=exc) + + with pytest.raises(type(exc)): + docker_registry.list_tags('agent') diff --git a/ddev/tests/utils/test_github_async.py b/ddev/tests/utils/test_github_async.py index 259620728afa5..d8a3cc1c8dde8 100644 --- a/ddev/tests/utils/test_github_async.py +++ b/ddev/tests/utils/test_github_async.py @@ -27,6 +27,8 @@ PullRequestRef, PullRequestReviewComment, WorkflowDispatchResult, + WorkflowJob, + WorkflowJobsList, WorkflowRun, ) @@ -72,6 +74,17 @@ def _workflow_run_payload() -> dict[str, Any]: } +def _workflow_job(idx: int, *, status: str = "completed", conclusion: str | None = "success") -> dict[str, Any]: + return { + "id": idx, + "run_id": 42, + "name": f"job-{idx}", + "status": status, + "conclusion": conclusion, + "html_url": f"https://github.com/owner/repo/actions/runs/42/job/{idx}", + } + + def _issue_comment_payload() -> dict[str, Any]: return { "id": 1, @@ -196,10 +209,17 @@ def handler(request: httpx.Request) -> httpx.Response: assert body["ref"] == "main" assert body["return_run_details"] is True assert "inputs" not in body - return _json_response({"workflow_run_id": 999}, headers={"x-ratelimit-remaining": "59"}) + return _json_response( + { + "workflow_run_id": 999, + "run_url": "https://api.github.com/repos/owner/repo/actions/runs/999", + "html_url": "https://github.com/owner/repo/actions/runs/999", + }, + headers={"x-ratelimit-remaining": "59"}, + ) client = _make_client(httpx.MockTransport(handler)) - result = await client.create_workflow_dispatch("owner", "repo", "my-workflow.yml", "main") + result = await client.create_workflow_dispatch("owner", "repo", "my-workflow.yml", "main", return_run_details=True) assert isinstance(result, GitHubResponse) assert isinstance(result.data, WorkflowDispatchResult) assert result.data.workflow_run_id == 999 @@ -211,10 +231,18 @@ def handler(request: httpx.Request) -> httpx.Response: body = json.loads(request.content) assert body["inputs"] == {"env": "prod"} assert body["return_run_details"] is True - return _json_response({"workflow_run_id": 1}) + return _json_response( + { + "workflow_run_id": 1, + "run_url": "https://api.github.com/repos/o/r/actions/runs/1", + "html_url": "https://github.com/o/r/actions/runs/1", + } + ) client = _make_client(httpx.MockTransport(handler)) - result = await client.create_workflow_dispatch("o", "r", 123, "main", inputs={"env": "prod"}) + result = await client.create_workflow_dispatch( + "o", "r", 123, "main", inputs={"env": "prod"}, return_run_details=True + ) assert result.data.workflow_run_id == 1 @@ -225,6 +253,34 @@ async def test_create_workflow_dispatch_http_error_raises() -> None: assert exc_info.value.response.status_code == 422 +async def test_create_workflow_dispatch_return_run_details_parses_response() -> None: + payload = { + "workflow_run_id": 987, + "run_url": "https://api.github.com/repos/o/r/actions/runs/987", + "html_url": "https://github.com/o/r/actions/runs/987", + } + + def handler(request: httpx.Request) -> httpx.Response: + body = json.loads(request.content) + assert body["return_run_details"] is True + return _json_response(payload) + + client = _make_client(httpx.MockTransport(handler)) + result = await client.create_workflow_dispatch("o", "r", "wf.yml", "main", return_run_details=True) + assert result.data.workflow_run_id == 987 + assert result.data.html_url == "https://github.com/o/r/actions/runs/987" + + +async def test_create_workflow_dispatch_omits_return_run_details_when_false() -> None: + def handler(request: httpx.Request) -> httpx.Response: + body = json.loads(request.content) + assert "return_run_details" not in body + return httpx.Response(204) + + client = _make_client(httpx.MockTransport(handler)) + await client.create_workflow_dispatch("o", "r", "wf.yml", "main") + + # --------------------------------------------------------------------------- # get_workflow_run # --------------------------------------------------------------------------- @@ -336,6 +392,62 @@ def handler(request: httpx.Request) -> httpx.Response: pass +# --------------------------------------------------------------------------- +# list_workflow_jobs (pagination) +# --------------------------------------------------------------------------- + + +async def test_list_workflow_jobs_single_page() -> None: + jobs = [_workflow_job(1), _workflow_job(2, status="in_progress", conclusion=None)] + + def handler(request: httpx.Request) -> httpx.Response: + assert request.method == "GET" + assert "/actions/runs/42/jobs" in request.url.path + return _json_response({"total_count": 2, "jobs": jobs}) + + client = _make_client(httpx.MockTransport(handler)) + pages = [] + async for page in client.list_workflow_jobs("owner", "repo", 42): + pages.append(page) + + assert len(pages) == 1 + assert isinstance(pages[0].data, WorkflowJobsList) + assert pages[0].data.total_count == 2 + assert isinstance(pages[0].data.jobs[0], WorkflowJob) + assert pages[0].data.jobs[1].html_url == "https://github.com/owner/repo/actions/runs/42/job/2" + + +async def test_list_workflow_jobs_two_pages() -> None: + call_count = 0 + + def handler(request: httpx.Request) -> httpx.Response: + nonlocal call_count + call_count += 1 + if call_count == 1: + link = f'<{request.url.scheme}://{request.url.host}/page2>; rel="next"' + return _json_response({"total_count": 2, "jobs": [_workflow_job(1)]}, headers={"link": link}) + return _json_response({"total_count": 2, "jobs": [_workflow_job(2)]}) + + client = _make_client(httpx.MockTransport(handler)) + pages = [] + async for page in client.list_workflow_jobs("owner", "repo", 42): + pages.append(page) + + assert len(pages) == 2 + assert pages[0].data.jobs[0].id == 1 + assert pages[1].data.jobs[0].id == 2 + + +async def test_list_workflow_jobs_per_page_forwarded() -> None: + def handler(request: httpx.Request) -> httpx.Response: + assert request.url.params["per_page"] == "100" + return _json_response({"total_count": 0, "jobs": []}) + + client = _make_client(httpx.MockTransport(handler)) + async for _ in client.list_workflow_jobs("owner", "repo", 42, per_page=100): + pass + + # --------------------------------------------------------------------------- # create_issue_comment # ---------------------------------------------------------------------------