diff --git a/.github/workflows/dependency-wheel-promotion.yaml b/.github/workflows/dependency-wheel-promotion.yaml index 5e26cb45bef8a..e5b522f43586e 100644 --- a/.github/workflows/dependency-wheel-promotion.yaml +++ b/.github/workflows/dependency-wheel-promotion.yaml @@ -27,6 +27,25 @@ jobs: - name: Checkout trusted code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Find existing lifecycle comment + id: find_comment + uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4.0.0 + with: + issue-number: ${{ inputs.pr_number }} + body-includes: "" + + - name: Post lifecycle comment (started) + id: started_comment + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0 + with: + issue-number: ${{ inputs.pr_number }} + comment-id: ${{ steps.find_comment.outputs.comment-id }} + edit-mode: replace + body: | + + Wheel promotion started for commit `${{ inputs.head_sha }}` by @${{ github.actor }}. + Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + - name: Checkout PR lockfiles only uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -62,41 +81,57 @@ jobs: - name: Set dependency-wheel-promotion status to success uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + HEAD_SHA: ${{ inputs.head_sha }} with: script: | await github.rest.repos.createCommitStatus({ owner: context.repo.owner, repo: context.repo.repo, - sha: '${{ inputs.head_sha }}', + sha: process.env.HEAD_SHA, state: 'success', context: 'dependency-wheel-promotion', description: 'Wheels promoted to stable storage.', target_url: `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, }); - - name: Post success comment - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + - name: Update lifecycle comment (success) + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0 with: - script: | - const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ inputs.pr_number }}, - body: `Wheels promoted to stable storage for commit ${{ inputs.head_sha }} by @${context.actor}. [Workflow run](${runUrl}).`, - }); + issue-number: ${{ inputs.pr_number }} + comment-id: ${{ steps.started_comment.outputs.comment-id }} + edit-mode: replace + body: | + + Wheels promoted to stable storage for commit `${{ inputs.head_sha }}` by @${{ github.actor }}. + Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - name: Set dependency-wheel-promotion status to error if: failure() uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + HEAD_SHA: ${{ inputs.head_sha }} with: script: | await github.rest.repos.createCommitStatus({ owner: context.repo.owner, repo: context.repo.repo, - sha: '${{ inputs.head_sha }}', + sha: process.env.HEAD_SHA, state: 'error', context: 'dependency-wheel-promotion', description: 'Wheel promotion failed. Check the Actions tab for details.', target_url: `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, }); + + - name: Update lifecycle comment (failure) + if: failure() + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0 + with: + issue-number: ${{ inputs.pr_number }} + comment-id: ${{ steps.started_comment.outputs.comment-id }} + edit-mode: replace + body: | + + Wheel promotion failed for commit `${{ inputs.head_sha }}` by @${{ github.actor }}. + Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + Check the workflow logs before retrying. diff --git a/.github/workflows/master-windows.yml b/.github/workflows/master-windows.yml index 7f858efd91012..e1cdd3c037d71 100644 --- a/.github/workflows/master-windows.yml +++ b/.github/workflows/master-windows.yml @@ -102,7 +102,7 @@ jobs: - name: Upload coverage to Datadog if: always() continue-on-error: true - uses: DataDog/coverage-upload-github-action@9bbbf86d16f7db1b14c5b885e61cf0d96053686a # v1.0.0 + uses: DataDog/coverage-upload-github-action@6c4bd935248daa6f0ef94e3e6ba71ad5ad079998 # v1.0.3 with: api_key: ${{ secrets.DD_API_KEY }} files: coverage-reports diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 7fcc2f903aaf0..45786b66173ae 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -3,27 +3,27 @@ name: Master on: push: branches: - - master + - master paths: # List of files/paths that should trigger the run. The intention is to avoid running all tests if the commit only includes changes on assets or README - - '*/datadog_checks/**' - - '*/tests/**' - - 'ddev/**' - - 'datadog_checks_base/**' - - 'datadog_checks_dev/**' - # Contains overrides for testing - - '.ddev/**' - # Want to ensure any change in workflows is validated - - '.github/workflows/**' - # Test matrices and dependencies - - '*/hatch.toml' - - '*/pyproject.toml' - # Some integrations might use this file to validate metrics emission - - '*/metadata.csv' - # In case some linting formatting config has changed - - 'pyproject.toml' + - "*/datadog_checks/**" + - "*/tests/**" + - "ddev/**" + - "datadog_checks_base/**" + - "datadog_checks_dev/**" + # Contains overrides for testing + - ".ddev/**" + # Want to ensure any change in workflows is validated + - ".github/workflows/**" + # Test matrices and dependencies + - "*/hatch.toml" + - "*/pyproject.toml" + # Some integrations might use this file to validate metrics emission + - "*/metadata.csv" + # In case some linting formatting config has changed + - "pyproject.toml" schedule: - - cron: '0 2 * * *' + - cron: "0 2 * * *" jobs: cache: @@ -31,7 +31,7 @@ jobs: test: needs: - - cache + - cache uses: ./.github/workflows/test-all.yml with: @@ -48,12 +48,12 @@ jobs: secrets: inherit permissions: - # needed for compute-matrix in test-target.yml - contents: read + # needed for compute-matrix in test-target.yml + contents: read publish-test-results: needs: - - test + - test if: success() || failure() concurrency: @@ -69,7 +69,7 @@ jobs: upload-coverage: needs: - - test + - test if: > !github.event.repository.private && (success() || failure()) @@ -80,27 +80,27 @@ jobs: contents: read steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Download all coverage artifacts - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 - with: - pattern: coverage-* - path: coverage-reports - merge-multiple: false + - name: Download all coverage artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + with: + pattern: coverage-* + path: coverage-reports + merge-multiple: false - - name: Upload coverage to Codecov - uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de - with: - use_oidc: true - directory: coverage-reports - fail_ci_if_error: false + - name: Upload coverage to Codecov + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de + with: + use_oidc: true + directory: coverage-reports + fail_ci_if_error: false - - name: Upload coverage to Datadog - if: always() - continue-on-error: true - uses: DataDog/coverage-upload-github-action@9bbbf86d16f7db1b14c5b885e61cf0d96053686a # v1.0.0 - with: - api_key: ${{ secrets.DD_API_KEY }} - files: coverage-reports - format: cobertura + - name: Upload coverage to Datadog + if: always() + continue-on-error: true + uses: DataDog/coverage-upload-github-action@6c4bd935248daa6f0ef94e3e6ba71ad5ad079998 # v1.0.3 + with: + api_key: ${{ secrets.DD_API_KEY }} + files: coverage-reports + format: cobertura diff --git a/.github/workflows/pr-all-windows.yml b/.github/workflows/pr-all-windows.yml index 8f1d9c0e34268..a3a5c824a1d7c 100644 --- a/.github/workflows/pr-all-windows.yml +++ b/.github/workflows/pr-all-windows.yml @@ -5,17 +5,17 @@ name: PR All Windows on: pull_request: paths: - - datadog_checks_base/datadog_checks/** - - datadog_checks_dev/datadog_checks/dev/*.py - - ddev/src/** - - "!agent_requirements.in" - # Also run if we modify the workflow files - - '.github/workflows/pr-all-windows.yml' - - '.github/workflows/test-target.yml' - - '.github/workflows/test-all-windows.yml' - # Also run in the action to install test-target scripts changes - - '.github/actions/setup-test-target-scripts/**' - - '.github/actions/setup-ddev/**' + - datadog_checks_base/datadog_checks/** + - datadog_checks_dev/datadog_checks/dev/*.py + - ddev/src/** + - "!agent_requirements.in" + # Also run if we modify the workflow files + - ".github/workflows/pr-all-windows.yml" + - ".github/workflows/test-target.yml" + - ".github/workflows/test-all-windows.yml" + # Also run in the action to install test-target scripts changes + - ".github/actions/setup-test-target-scripts/**" + - ".github/actions/setup-ddev/**" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref }} @@ -26,8 +26,8 @@ jobs: uses: ./.github/workflows/test-all-windows.yml permissions: - # needed for compute-matrix in test-target.yml - contents: read + # needed for compute-matrix in test-target.yml + contents: read with: repo: core @@ -39,14 +39,14 @@ jobs: save-event: needs: - - test + - test if: success() || failure() uses: ./.github/workflows/save-event.yml upload-coverage: needs: - - test + - test if: > !github.event.repository.private && (success() || failure()) @@ -57,27 +57,27 @@ jobs: contents: read steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Download all coverage artifacts - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 - with: - pattern: coverage-* - path: coverage-reports - merge-multiple: false + - name: Download all coverage artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + with: + pattern: coverage-* + path: coverage-reports + merge-multiple: false - - name: Upload coverage to Codecov - uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de - with: - use_oidc: true - directory: coverage-reports - fail_ci_if_error: false + - name: Upload coverage to Codecov + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de + with: + use_oidc: true + directory: coverage-reports + fail_ci_if_error: false - - name: Upload coverage to Datadog - if: always() - continue-on-error: true - uses: DataDog/coverage-upload-github-action@9bbbf86d16f7db1b14c5b885e61cf0d96053686a # v1.0.0 - with: - api_key: ${{ secrets.DD_API_KEY }} - files: coverage-reports - format: cobertura + - name: Upload coverage to Datadog + if: always() + continue-on-error: true + uses: DataDog/coverage-upload-github-action@6c4bd935248daa6f0ef94e3e6ba71ad5ad079998 # v1.0.3 + with: + api_key: ${{ secrets.DD_API_KEY }} + files: coverage-reports + format: cobertura diff --git a/.github/workflows/pr-all.yml b/.github/workflows/pr-all.yml index 9ea6dce99667e..fb9fe8ca4cb30 100644 --- a/.github/workflows/pr-all.yml +++ b/.github/workflows/pr-all.yml @@ -3,20 +3,20 @@ name: PR All on: pull_request: paths: - - datadog_checks_base/datadog_checks/** - - datadog_checks_base/pyproject.toml - - datadog_checks_dev/datadog_checks/dev/*.py - - datadog_checks_dev/pyproject.toml - - ddev/src/** - - ddev/pyproject.toml - - "!agent_requirements.in" - # Also run if we modify the workflow files - - '.github/workflows/pr-all.yml' - - '.github/workflows/test-target.yml' - - '.github/workflows/test-all.yml' - # Also run if the action to install test-target scripts changes - - '.github/actions/setup-test-target-scripts/**' - - '.github/actions/setup-ddev/**' + - datadog_checks_base/datadog_checks/** + - datadog_checks_base/pyproject.toml + - datadog_checks_dev/datadog_checks/dev/*.py + - datadog_checks_dev/pyproject.toml + - ddev/src/** + - ddev/pyproject.toml + - "!agent_requirements.in" + # Also run if we modify the workflow files + - ".github/workflows/pr-all.yml" + - ".github/workflows/test-target.yml" + - ".github/workflows/test-all.yml" + # Also run if the action to install test-target scripts changes + - ".github/actions/setup-test-target-scripts/**" + - ".github/actions/setup-ddev/**" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref }} @@ -27,8 +27,8 @@ jobs: uses: ./.github/workflows/test-all.yml permissions: - # needed for compute-matrix in test-target.yml - contents: read + # needed for compute-matrix in test-target.yml + contents: read with: repo: core @@ -42,14 +42,14 @@ jobs: save-event: needs: - - test + - test if: success() || failure() uses: ./.github/workflows/save-event.yml upload-coverage: needs: - - test + - test if: > !github.event.repository.private && (success() || failure()) @@ -60,27 +60,27 @@ jobs: contents: read steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Download all coverage artifacts - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 - with: - pattern: coverage-* - path: coverage-reports - merge-multiple: false + - name: Download all coverage artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + with: + pattern: coverage-* + path: coverage-reports + merge-multiple: false - - name: Upload coverage to Codecov - uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de - with: - use_oidc: true - directory: coverage-reports - fail_ci_if_error: false + - name: Upload coverage to Codecov + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de + with: + use_oidc: true + directory: coverage-reports + fail_ci_if_error: false - - name: Upload coverage to Datadog - if: always() - continue-on-error: true - uses: DataDog/coverage-upload-github-action@9bbbf86d16f7db1b14c5b885e61cf0d96053686a # v1.0.0 - with: - api_key: ${{ secrets.DD_API_KEY }} - files: coverage-reports - format: cobertura + - name: Upload coverage to Datadog + if: always() + continue-on-error: true + uses: DataDog/coverage-upload-github-action@6c4bd935248daa6f0ef94e3e6ba71ad5ad079998 # v1.0.3 + with: + api_key: ${{ secrets.DD_API_KEY }} + files: coverage-reports + format: cobertura diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 8f3a14ece990e..824471469d4dc 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -33,7 +33,7 @@ jobs: test: needs: - - compute-matrix + - compute-matrix if: needs.compute-matrix.outputs.matrix != '[]' && github.event_name != 'merge_group' strategy: fail-fast: false @@ -64,7 +64,7 @@ jobs: test-minimum-base-package: needs: - - compute-matrix + - compute-matrix if: needs.compute-matrix.outputs.matrix != '[]' && github.event_name != 'merge_group' strategy: fail-fast: false @@ -96,16 +96,16 @@ jobs: save-event: needs: - - test - - test-minimum-base-package + - test + - test-minimum-base-package if: success() || failure() uses: ./.github/workflows/save-event.yml upload-coverage: needs: - - test - - test-minimum-base-package + - test + - test-minimum-base-package if: > !github.event.repository.private && (success() || failure()) && @@ -117,35 +117,35 @@ jobs: contents: read steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: Download all coverage artifacts - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 - with: - pattern: coverage-* - path: coverage-reports - merge-multiple: false - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de - with: - use_oidc: true - directory: coverage-reports - fail_ci_if_error: false - - - name: Upload coverage to Datadog - if: always() - continue-on-error: true - uses: DataDog/coverage-upload-github-action@9bbbf86d16f7db1b14c5b885e61cf0d96053686a # v1.0.0 - with: - api_key: ${{ secrets.DD_API_KEY }} - files: coverage-reports - format: cobertura + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Download all coverage artifacts + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + with: + pattern: coverage-* + path: coverage-reports + merge-multiple: false + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de + with: + use_oidc: true + directory: coverage-reports + fail_ci_if_error: false + + - name: Upload coverage to Datadog + if: always() + continue-on-error: true + uses: DataDog/coverage-upload-github-action@6c4bd935248daa6f0ef94e3e6ba71ad5ad079998 # v1.0.3 + with: + api_key: ${{ secrets.DD_API_KEY }} + files: coverage-reports + format: cobertura check: needs: - - test - - test-minimum-base-package + - test + - test-minimum-base-package # In integrations-core and integrations-extras repos the tests are flaky enough that # it would be a pain to merge PRs with the Merge Queue enabled. # While we work on the tests, we skip the job if it's triggered by Merge Queue. @@ -154,8 +154,8 @@ jobs: runs-on: ubuntu-latest steps: - - name: Check status of required jobs - uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # v1.2.2 - with: - jobs: ${{ toJSON(needs) }} - allowed-skips: test, test-minimum-base-package + - name: Check status of required jobs + uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe # v1.2.2 + with: + jobs: ${{ toJSON(needs) }} + allowed-skips: test, test-minimum-base-package diff --git a/.github/workflows/test-fips-e2e.yml b/.github/workflows/test-fips-e2e.yml index 1035573ea9424..23ae8619da71f 100644 --- a/.github/workflows/test-fips-e2e.yml +++ b/.github/workflows/test-fips-e2e.yml @@ -17,10 +17,10 @@ on: type: string pull_request: paths: - - datadog_checks_base/datadog_checks/** - - datadog_checks_base/pyproject.toml + - datadog_checks_base/datadog_checks/** + - datadog_checks_base/pyproject.toml schedule: - - cron: '0 0,8,16 * * *' + - cron: "0 0,8,16 * * *" defaults: run: @@ -43,103 +43,102 @@ jobs: DD_TRACE_ANALYTICS_ENABLED: "true" permissions: - # needed for dd-sts and codecov in test-target.yml, allows the action to get a JWT signed by Github - id-token: write - # needed for compute-matrix in test-target.yml - contents: read + # needed for dd-sts and codecov in test-target.yml, allows the action to get a JWT signed by Github + id-token: write + # needed for compute-matrix in test-target.yml + contents: read steps: - - - name: Set environment variables with sanitized paths - run: | - JOB_NAME="test-fips-e2e" - - echo "TEST_RESULTS_DIR=$TEST_RESULTS_BASE_DIR/$JOB_NAME" >> $GITHUB_ENV - echo "TRACE_CAPTURE_FILE=$TRACE_CAPTURE_BASE_DIR/$JOB_NAME" >> $GITHUB_ENV - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: "${{ env.PYTHON_VERSION }}" - - - name: Get Datadog credentials - id: dd-sts - uses: DataDog/dd-sts-action@2e8187910199bd93129520183c093e19aa585c75 # v1.0.0 - with: - policy: integrations-core-api-key - - - name: Install ddev from local folder - uses: ./.github/actions/setup-ddev - with: - install-mode: local - cache-profile: local-ddev-base - - - name: Configure ddev - run: |- - ddev config set upgrade_check false - ddev config set repos.core . - ddev config set repo core - - - name: Prepare for testing - env: - PYTHONUNBUFFERED: "1" - DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} - DOCKER_ACCESS_TOKEN: ${{ secrets.DOCKER_ACCESS_TOKEN }} - ORACLE_DOCKER_USERNAME: ${{ secrets.ORACLE_DOCKER_USERNAME }} - ORACLE_DOCKER_PASSWORD: ${{ secrets.ORACLE_DOCKER_PASSWORD }} - DD_GITHUB_USER: ${{ github.actor }} - DD_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: ddev ci setup ${{ inputs.target || 'tls' }} - - - name: Run E2E tests with FIPS disabled - env: - DDEV_E2E_AGENT: "${{ inputs.agent-image || 'registry.datadoghq.com/agent-dev:master-py3' }}" - DD_API_KEY: "${{ steps.dd-sts.outputs.api_key }}" - run: | - ddev env test --base --new-env --junit ${{ inputs.target || 'tls' }} -- all -m "fips_off" - - - name: Run E2E tests with FIPS enabled - env: - DDEV_E2E_AGENT: "${{ inputs.agent-image-fips || 'registry.datadoghq.com/agent-dev:master-fips' }}" - DD_API_KEY: "${{ steps.dd-sts.outputs.api_key }}" - run: | - ddev env test --base --new-env --junit ${{ inputs.target || 'tls' }} -- all -k "fips_on" - - - name: Finalize test results - if: always() - run: |- - mkdir -p "${{ env.TEST_RESULTS_DIR }}" - if [[ -d ${{ inputs.target || 'tls' }}/junit ]]; then - mv ${{ inputs.target || 'tls' }}/junit/*.xml "${{ env.TEST_RESULTS_DIR }}" - fi - - - name: Upload test results - if: always() - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 - with: - name: "test-results-${{ inputs.target || 'tls' }}" - path: "${{ env.TEST_RESULTS_BASE_DIR }}" - - - name: Upload coverage data - if: > - !github.event.repository.private && - always() - uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de - with: - use_oidc: true - files: "${{ inputs.target || 'tls' }}/coverage.xml" - flags: "${{ inputs.target || 'tls' }}" - - - name: Upload coverage to Datadog - if: > - !github.event.repository.private && - always() - continue-on-error: true - uses: DataDog/coverage-upload-github-action@9bbbf86d16f7db1b14c5b885e61cf0d96053686a # v1.0.0 - with: - api_key: ${{ secrets.DD_API_KEY }} - files: "${{ inputs.target || 'tls' }}/coverage.xml" - format: cobertura - flags: "${{ inputs.target || 'tls' }}" + - name: Set environment variables with sanitized paths + run: | + JOB_NAME="test-fips-e2e" + + echo "TEST_RESULTS_DIR=$TEST_RESULTS_BASE_DIR/$JOB_NAME" >> $GITHUB_ENV + echo "TRACE_CAPTURE_FILE=$TRACE_CAPTURE_BASE_DIR/$JOB_NAME" >> $GITHUB_ENV + + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Get Datadog credentials + id: dd-sts + uses: DataDog/dd-sts-action@2e8187910199bd93129520183c093e19aa585c75 # v1.0.0 + with: + policy: integrations-core-api-key + + - name: Install ddev from local folder + uses: ./.github/actions/setup-ddev + with: + install-mode: local + cache-profile: local-ddev-base + + - name: Configure ddev + run: |- + ddev config set upgrade_check false + ddev config set repos.core . + ddev config set repo core + + - name: Prepare for testing + env: + PYTHONUNBUFFERED: "1" + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_ACCESS_TOKEN: ${{ secrets.DOCKER_ACCESS_TOKEN }} + ORACLE_DOCKER_USERNAME: ${{ secrets.ORACLE_DOCKER_USERNAME }} + ORACLE_DOCKER_PASSWORD: ${{ secrets.ORACLE_DOCKER_PASSWORD }} + DD_GITHUB_USER: ${{ github.actor }} + DD_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: ddev ci setup ${{ inputs.target || 'tls' }} + + - name: Run E2E tests with FIPS disabled + env: + DDEV_E2E_AGENT: "${{ inputs.agent-image || 'registry.datadoghq.com/agent-dev:master-py3' }}" + DD_API_KEY: "${{ steps.dd-sts.outputs.api_key }}" + run: | + ddev env test --base --new-env --junit ${{ inputs.target || 'tls' }} -- all -m "fips_off" + + - name: Run E2E tests with FIPS enabled + env: + DDEV_E2E_AGENT: "${{ inputs.agent-image-fips || 'registry.datadoghq.com/agent-dev:master-fips' }}" + DD_API_KEY: "${{ steps.dd-sts.outputs.api_key }}" + run: | + ddev env test --base --new-env --junit ${{ inputs.target || 'tls' }} -- all -k "fips_on" + + - name: Finalize test results + if: always() + run: |- + mkdir -p "${{ env.TEST_RESULTS_DIR }}" + if [[ -d ${{ inputs.target || 'tls' }}/junit ]]; then + mv ${{ inputs.target || 'tls' }}/junit/*.xml "${{ env.TEST_RESULTS_DIR }}" + fi + + - name: Upload test results + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: "test-results-${{ inputs.target || 'tls' }}" + path: "${{ env.TEST_RESULTS_BASE_DIR }}" + + - name: Upload coverage data + if: > + !github.event.repository.private && + always() + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de + with: + use_oidc: true + files: "${{ inputs.target || 'tls' }}/coverage.xml" + flags: "${{ inputs.target || 'tls' }}" + + - name: Upload coverage to Datadog + if: > + !github.event.repository.private && + always() + continue-on-error: true + uses: DataDog/coverage-upload-github-action@6c4bd935248daa6f0ef94e3e6ba71ad5ad079998 # v1.0.3 + with: + api_key: ${{ secrets.DD_API_KEY }} + files: "${{ inputs.target || 'tls' }}/coverage.xml" + format: cobertura + flags: "${{ inputs.target || 'tls' }}" diff --git a/datadog_checks_downloader/changelog.d/23144.added b/datadog_checks_downloader/changelog.d/23144.added new file mode 100644 index 0000000000000..2b3e21333eff5 --- /dev/null +++ b/datadog_checks_downloader/changelog.d/23144.added @@ -0,0 +1 @@ +Add v2 TUF pointer downloader support. diff --git a/datadog_checks_downloader/datadog_checks/downloader/cli.py b/datadog_checks_downloader/datadog_checks/downloader/cli.py index be3776c3d3682..8cdd29f44af20 100644 --- a/datadog_checks_downloader/datadog_checks/downloader/cli.py +++ b/datadog_checks_downloader/datadog_checks/downloader/cli.py @@ -2,16 +2,30 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations # 1st party. import argparse +import logging import os import re import sys +import urllib.error # 2nd party. +from tuf.api.exceptions import DownloadError + from .download import DEFAULT_ROOT_LAYOUT_TYPE, REPOSITORY_URL_PREFIX, ROOT_LAYOUTS, TUFDownloader -from .exceptions import NonCanonicalVersion, NonDatadogPackage +from .download_v2 import V2_REPOSITORY_URL, TUFPointerDownloader +from .exceptions import CLIError, MissingVersion, NonCanonicalVersion, NonDatadogPackage, TargetNotFoundError + +V2_FALLBACK_ERRORS: tuple[type[BaseException], ...] = ( + MissingVersion, + TargetNotFoundError, + DownloadError, + TimeoutError, + urllib.error.URLError, +) # Private module functions. @@ -25,6 +39,14 @@ def __is_canonical(version): return re.match(P, version) is not None +def _v2_failure_category(exc: Exception) -> str: + if isinstance(exc, TargetNotFoundError): + return 'target version not found' + if isinstance(exc, (DownloadError, TimeoutError, urllib.error.URLError)): + return 'network error' + return 'other' + + def __find_shipped_integrations(): # Recurse up from site-packages until we find the Agent root directory. # The relative path differs between operating systems. @@ -142,6 +164,88 @@ def run_downloader(tuf_downloader, standard_distribution_name, version, ignore_p # Public module functions. -def download(): - tuf_downloader, standard_distribution_name, version, ignore_python_version = instantiate_downloader() - run_downloader(tuf_downloader, standard_distribution_name, version, ignore_python_version) +def download() -> None: + downloader, name, version, args = instantiate_v2_downloader() + + if args.v2: + warn_v2_ignored_args(args) + run_v2_downloader(downloader, name, version) + return + + try: + run_v2_downloader(downloader, name, version) + except V2_FALLBACK_ERRORS as exc: + # Integrity failures (DigestMismatch / LengthMismatch / MalformedPointerError) are + # intentionally not in V2_FALLBACK_ERRORS β€” they must propagate, not be masked by v1. + logging.getLogger(__name__).info( + 'v2 download failed (%s, %s: %s), falling back to v1', + _v2_failure_category(exc), + type(exc).__name__, + exc, + ) + run_downloader(*instantiate_downloader()) + except CLIError: + # NonDatadogPackage and NonCanonicalVersion: v1 would raise the same. + raise + + +def _v2_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + + parser.add_argument( + 'standard_distribution_name', + type=str, + help='Standard distribution name of the desired Datadog check, e.g. datadog-postgres.', + ) + parser.add_argument( + '--repository', type=str, default=V2_REPOSITORY_URL, help='HTTPS base URL of the v2 TUF repository.' + ) + parser.add_argument('--version', type=str, default=None, help='Version to download (default: latest stable).') + parser.add_argument( + '--unsafe-disable-verification', + action='store_true', + help='Disable TUF verification and wheel digest checks; requires --version and downloads /wheels directly.', + ) + parser.add_argument('-v', '--verbose', action='count', default=0) + parser.add_argument('--v2', action='store_true', default=False) + + # v1 compat flags accepted as no-ops so callers upgrading from v1 get a warning, not an error. + parser.add_argument('--type', type=str, default=None, dest='ignored_type') + parser.add_argument('--ignore-python-version', action='store_true', dest='ignored_ignore_python_version') + parser.add_argument('--force', action='store_true', dest='ignored_force') + + return parser + + +def warn_v2_ignored_args(args: argparse.Namespace) -> None: + if args.ignored_type is not None: + sys.stderr.write('WARNING: --type is not applicable with --v2 and will be ignored.\n') + if args.ignored_ignore_python_version: + sys.stderr.write( + 'NOTE: --ignore-python-version is not applicable with --v2 (wheel selection happens at publish time).\n' + ) + + +def instantiate_v2_downloader() -> tuple[TUFPointerDownloader, str, str | None, argparse.Namespace]: + args = _v2_parser().parse_args() + + if not args.standard_distribution_name.startswith('datadog-'): + raise NonDatadogPackage(args.standard_distribution_name) + + if args.version and not __is_canonical(args.version): + raise NonCanonicalVersion(args.version) + + remainder = min(args.verbose, 5) % 6 + level = (6 - remainder) * 10 + logging.basicConfig(format='%(levelname)-8s: %(message)s', level=level) + + downloader = TUFPointerDownloader( + repository_url=args.repository, + disable_verification=args.unsafe_disable_verification, + ) + return downloader, args.standard_distribution_name, args.version, args + + +def run_v2_downloader(downloader: TUFPointerDownloader, name: str, version: str | None) -> None: + wheel_path = downloader.download(name, version=version) + print(wheel_path) # pylint: disable=print-statement diff --git a/datadog_checks_downloader/datadog_checks/downloader/data/v2/metadata/root.json b/datadog_checks_downloader/datadog_checks/downloader/data/v2/metadata/root.json new file mode 100644 index 0000000000000..e053044657c99 --- /dev/null +++ b/datadog_checks_downloader/datadog_checks/downloader/data/v2/metadata/root.json @@ -0,0 +1,191 @@ +{ + "signatures": [ + { + "keyid": "ac5d650bc9aa17fdad54753fbf64e083f6f613286d0feef991ff61ec26874f2b", + "sig": "3066023100beb16cde4c9e17c725713c6020cb5b11a65dd60a7b46f6842068815593e8f39cfa547ea89b6169474a8ee6a98c22f934023100f215434d25181f6f0d6a75a1bae4f09814678d3409cc0aaf0f8e1cb2b0a7bcb29acd5394b4df1751f7e73c641a17256b" + }, + { + "keyid": "6f0f52eb4cb14d590aafd5f7eb8d9a79477ac89794be2d3caade9fc39b3735e6", + "sig": "306502306130792e890c5257cc8fc951e7c9a4009a5552affbd6bdf5cef3bac647de18f82918adbd4edfaa6d1624e2c378ee0ca4023100db6cff59e145b0113560116eac4466e52fc24e3ec3f02ee39fe7213718c2184d58e5473a5f29429b1a4c63e7ff87b83b" + }, + { + "keyid": "2d019dcc7a3e8da4d22bf364f0e0cb87937b2ced68339f3c53d305d1a9aadcce", + "sig": "3066023100ed93223b4ab9784c00b73937cd431fe9d6af8906548124a10215ad93432523476a3265e712fe99b7555ea30ce5aeff30023100e55ae85f68da6ac322f912cae2a28facb6b3f014770d348a7c9daee100e5eb8ae78cc86321b20711f3b357d900a075a4" + }, + { + "keyid": "e942404daa3e8cb1143ab5f275df2f8c741ae002194147806bd6f05b8e2e816f", + "sig": "3066023100a1a75a85dbe43db459e3d8c1bd935f2717bae0b1cba79ea5b9a5e785b7eb08cc30e08f96ba5fc0ccb9b9c97b9af456450231008dcc197a5de9a93649dfbd27e3f112321441913138c3377487ae85353a982cbffdc88029d681e432e86cfc14c51196ab" + }, + { + "keyid": "1286a08794005a5f1d679e56322f45fd3b55aa198f87bdc699f8213048602000", + "sig": "3064023075188913725a1c2e9af59f8663b6a178156b64d87da126a5970a3b6a3399bdfe7f5c357099f2f1a4e83d52294551c41e0230080ef2ff77b7d558879cbe0eda409c3ba2fe080860506d4f2ede314374e39dc0b2bc466af51fdd258eb76171344d42af" + }, + { + "keyid": "65ccb05ff16285a3b65ea2db2581ed083bb19acfcbd130d5484c151baf28541f", + "sig": "306602310086b9d6f39f795ad188223318f02e1d78b5798d34e333c1933e55891cc1b11cd6771b2d9ab1f5c1fc707d4815e3cc200d023100eb63f35f7cc2d0166357f2c209ecca63b82fc6bc9c310b9a0fa345957b1a0df102036ea6a6d3825d787eb7d3b3131e70" + }, + { + "keyid": "b59ade3245077bd622dc7bf41163a877e05272590cb4830632dc0d034717d735", + "sig": "3066023100fa26ef91f1bb3cdf779cb6bbc43d70bab67a7c66103e61b8998698f469fad0d44002d4a9399ceac304b8ee1a8823fd99023100ecd415e58696ab4778f4bdf5187be3743ac372b29cd139111b3461a0da42f8f44e5bb3ec83c2bd0ce4b6281e585b8889" + }, + { + "keyid": "a07e905cad57b71374ef5e408d61936c31957b35026de0b8db3938878ccad637", + "sig": "3066023100f4802957c21a0916677154494c4360260f5994c35c435d2bf2df39bc7cccca7fb437563d21ae128bcaa7909ead7d6e7802310097513f90e5e7dbe4bcb3f9b20308e966ec38960e8cad4869a4b32be8bd98726ded4a68c671d5f22858dd10ba3b56b04a" + }, + { + "keyid": "a442c20904f96e3a367e16037665bfb2e002bb2e9586cec4c96d83697a49fa2a", + "sig": "30660231008d58d822ee1accd6bff07e79f171d61d122d35c1d51c86b2f2ada76cff695090fdf859127889f9a8d90e539277b5ab5a023100f0ad8d7ba6a25e27316a91bbc61c9b4d31f42c5c93662ea53d660af6b8ab9ee111b135b6844901ccd281fd9246d5f786" + }, + { + "keyid": "8969905969a712d54c9b327939aead62784587b54d1d03cbaa835f79205069bf", + "sig": "3066023100894ecac9291e64ea6b84d168b886ef5829f4ad5b57c83b0ec745b644e4d19983e29c4681b5f744070a679e71fb4325af023100b6365d50a44e59932ea24d17a9fbc975cc7e7b44539d36dc7208470b1ddc9572b06266e149d1793a12a98cd62b803a95" + } + ], + "signed": { + "_type": "root", + "consistent_snapshot": true, + "expires": "2027-04-21T15:31:05Z", + "keys": { + "1286a08794005a5f1d679e56322f45fd3b55aa198f87bdc699f8213048602000": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEruzzCikai9w8LqTLE4cxf0qRIFU6AQve\nnMmudDdNo22MCiOwbuYjJJ1dvRlMiSVrAGyv1+37h8aXGa5Qbx5nb4TEIRfaDth8\nhMbKJcQ7OOK/6SaltjNZh3VaZ396/WIC\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@nouemankhal" + }, + "2d019dcc7a3e8da4d22bf364f0e0cb87937b2ced68339f3c53d305d1a9aadcce": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAErfozI3wqaB8k6o6Mc7SPFiw8s1dLTaxk\nMmhMsdkk7QIl3t+gFzWNdXANEjN027g4S6Ty2CvdzovU37yD24td9pQBh8LGmfPa\nmU5cxtzRaXkCibibJrrvLxyyZTWZXW6C\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@alexeypilyugin" + }, + "4542ee95093cb434e0d80a4bb9dd9d96e6b67cda12759fa2648a7786f822e97d": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEUV4g/gyxCdXKHK07QWO5z6S9lRhL88DO\nOb22g0dCOtxBB2sKojAUw3wXXz+SaUZRFgqfVvezbtsC4LSkkIlwA5MrJDA83kP2\nJRo4BQPtW8wZmtSvkkRQPSfAdXv975pg\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-online-uri": "awskms:arn:aws:kms:us-east-1:510233252802:key/9efe9e34-88f3-4ad3-8828-5340561e7c42" + }, + "65ccb05ff16285a3b65ea2db2581ed083bb19acfcbd130d5484c151baf28541f": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEi44mg+tnJn41Cy4Lr42lQNRuZaHDY4d+\nB/oYkBRTiHl6n6hc6alGLS/1rWijAfSL7x7wgVeOrA5fp1ornW27vPOkRVWJO5Lv\nZcZXwJYi7svVFBkFjBAtAOF6DGuAEWc9\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@lucia-sb" + }, + "6f0f52eb4cb14d590aafd5f7eb8d9a79477ac89794be2d3caade9fc39b3735e6": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEAbWHH4rfNiJFz9gXLPV/QJK0tky4/nW1\nyMPnUe1GRac6UfGcjZvGA7mpmns4FYG1KuHbPhWlEDOQnLjiIiJkY2+Z96tywq6y\n+/e+0Gc2KSsVr0IAWALkTzQE+Q6ru+lj\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@nubtron" + }, + "8969905969a712d54c9b327939aead62784587b54d1d03cbaa835f79205069bf": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEkztjp5ixZrKt94qnSn4bisyEdgs0Wre/\nheazr1zx7MJUCLiHim0lEDWCB64m/YLru+W3/PLwTiQSavO62lB6y3ggjcq/ygwA\n5yxi0bP/MAJBZ0Hl+y+Q8BfKTZSrTb6j\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@hadhemidd" + }, + "a07e905cad57b71374ef5e408d61936c31957b35026de0b8db3938878ccad637": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEEilQwnno5GxJpoyxulKzkkHa0x0/ERDa\nf3m1ZCpF9SoT2B98T+BwT6noD+qlOwX7VKLFSQwl4/od53tu6Wt3s3P70zFviq+Y\n+chUOSCbA5y/TCvfwx4mLBruXI1QbVOh\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@aarakke" + }, + "a442c20904f96e3a367e16037665bfb2e002bb2e9586cec4c96d83697a49fa2a": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEUJ7k4tiIZrWNLhrNrcBBMh4we3GiMlpo\ntwVy72lNw7aMxisK6ttP0mV30Yh1rX37DO6UUdeiWImrYBVfXFkP7z2QD9qKetny\nCeVHycA7uNby7yb7pljv2l2SpTgXACZk\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@iliakur" + }, + "ac5d650bc9aa17fdad54753fbf64e083f6f613286d0feef991ff61ec26874f2b": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEAWOvhm6nk7iY+EYK8ZnrxS49yqLf/ZTR\nJ74WY9Kz3ikjyXASkD4IgqJyyrmbMoqS9k6/RM/Zk6CAfPeZneDh1puVAlxy9nJD\nZp/OW78dVOqrlw1uQ0d+gfe7b4TcUNG4\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@dkirov-dd" + }, + "b59ade3245077bd622dc7bf41163a877e05272590cb4830632dc0d034717d735": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEd/9wooA4OKbC7hUO1OTZN3pnFbc85PDs\n+izKkDDSqj3yk8Pa39OJstT2BHvrn/B0BKMHhE6T/PN/rhorKVIVZ3UZErn1QCgG\nkkcFfA5MQm92SjIr9zAJea9bVUJhZ+PA\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@sarah-witt" + }, + "e942404daa3e8cb1143ab5f275df2f8c741ae002194147806bd6f05b8e2e816f": { + "keytype": "ecdsa", + "keyval": { + "public": "-----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAE3VG/DJn/wmXh3bQ/LLjGMyKubQ1f5/1P\nJTVDYgTh5AC5zWxDSD26PoNpS29MecItPoM+pMy5YC99mwkEkxjNdwIke1Aons92\n8SVtL3BYH311oC6jLtFt+oqEunL5EdgJ\n-----END PUBLIC KEY-----\n" + }, + "scheme": "ecdsa-sha2-nistp384", + "x-tuf-on-ci-keyowner": "@kyle-neale" + } + }, + "roles": { + "root": { + "keyids": [ + "ac5d650bc9aa17fdad54753fbf64e083f6f613286d0feef991ff61ec26874f2b", + "6f0f52eb4cb14d590aafd5f7eb8d9a79477ac89794be2d3caade9fc39b3735e6", + "2d019dcc7a3e8da4d22bf364f0e0cb87937b2ced68339f3c53d305d1a9aadcce", + "e942404daa3e8cb1143ab5f275df2f8c741ae002194147806bd6f05b8e2e816f", + "1286a08794005a5f1d679e56322f45fd3b55aa198f87bdc699f8213048602000", + "65ccb05ff16285a3b65ea2db2581ed083bb19acfcbd130d5484c151baf28541f", + "b59ade3245077bd622dc7bf41163a877e05272590cb4830632dc0d034717d735", + "a07e905cad57b71374ef5e408d61936c31957b35026de0b8db3938878ccad637", + "a442c20904f96e3a367e16037665bfb2e002bb2e9586cec4c96d83697a49fa2a", + "8969905969a712d54c9b327939aead62784587b54d1d03cbaa835f79205069bf" + ], + "threshold": 2 + }, + "snapshot": { + "keyids": [ + "4542ee95093cb434e0d80a4bb9dd9d96e6b67cda12759fa2648a7786f822e97d" + ], + "threshold": 1, + "x-tuf-on-ci-expiry-period": 365, + "x-tuf-on-ci-signing-period": 60 + }, + "targets": { + "keyids": [ + "ac5d650bc9aa17fdad54753fbf64e083f6f613286d0feef991ff61ec26874f2b", + "6f0f52eb4cb14d590aafd5f7eb8d9a79477ac89794be2d3caade9fc39b3735e6", + "2d019dcc7a3e8da4d22bf364f0e0cb87937b2ced68339f3c53d305d1a9aadcce", + "e942404daa3e8cb1143ab5f275df2f8c741ae002194147806bd6f05b8e2e816f", + "1286a08794005a5f1d679e56322f45fd3b55aa198f87bdc699f8213048602000", + "65ccb05ff16285a3b65ea2db2581ed083bb19acfcbd130d5484c151baf28541f", + "b59ade3245077bd622dc7bf41163a877e05272590cb4830632dc0d034717d735", + "a07e905cad57b71374ef5e408d61936c31957b35026de0b8db3938878ccad637", + "a442c20904f96e3a367e16037665bfb2e002bb2e9586cec4c96d83697a49fa2a", + "8969905969a712d54c9b327939aead62784587b54d1d03cbaa835f79205069bf" + ], + "threshold": 1 + }, + "timestamp": { + "keyids": [ + "4542ee95093cb434e0d80a4bb9dd9d96e6b67cda12759fa2648a7786f822e97d" + ], + "threshold": 1, + "x-tuf-on-ci-expiry-period-hours": 48, + "x-tuf-on-ci-signing-period-hours": 24 + } + }, + "spec_version": "1.0.31", + "version": 1, + "x-tuf-on-ci-expiry-period": 365, + "x-tuf-on-ci-signing-period": 60 + } +} \ No newline at end of file diff --git a/datadog_checks_downloader/datadog_checks/downloader/download_v2.py b/datadog_checks_downloader/datadog_checks/downloader/download_v2.py new file mode 100644 index 0000000000000..82557de31f4c9 --- /dev/null +++ b/datadog_checks_downloader/datadog_checks/downloader/download_v2.py @@ -0,0 +1,140 @@ +# (C) Datadog, Inc. 2024-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +"""TUF pointer-file downloader for the v2 repository format.""" + +from __future__ import annotations + +import hashlib +import importlib.resources +import json +import logging +import tempfile +import urllib.request +from pathlib import Path + +from tuf.ngclient import Updater +from tuf.ngclient.config import UpdaterConfig + +from .exceptions import ( + DigestMismatch, + LengthMismatch, + MalformedPointerError, + MissingVersion, + TargetNotFoundError, +) + +logger = logging.getLogger(__name__) + +V2_REPOSITORY_URL = "https://agent-integration-wheels-prod.s3.amazonaws.com" + +# tuf.ngclient sets its own fetcher timeout; this applies only to the raw wheel urlopen(). +WHEEL_FETCH_TIMEOUT_SECONDS = 60 + +REQUIRED_POINTER_KEYS = ('digest', 'length', 'wheel_path') + + +class TUFPointerDownloader: + """Downloads Datadog integration wheels from a v2 TUF repository.""" + + def __init__(self, repository_url: str, disable_verification: bool = False): + self._repository_url = repository_url.rstrip('/') + self._disable_verification = disable_verification + + if disable_verification: + logger.warning('Running with TUF verification disabled. Integrity is protected only by TLS (HTTPS).') + + def _bootstrap_metadata_dir(self, metadata_dir: Path) -> None: + dest = metadata_dir / 'root.json' + metadata = importlib.resources.files('datadog_checks.downloader') / 'data' / 'v2' / 'metadata' + dest.write_bytes((metadata / 'root.json').read_bytes()) + + def _make_updater(self, metadata_dir: Path, target_dir: Path) -> Updater: + return Updater( + metadata_dir=str(metadata_dir), + metadata_base_url=f'{self._repository_url}/metadata/', + target_base_url=f'{self._repository_url}/targets/', + target_dir=str(target_dir), + config=UpdaterConfig(prefix_targets_with_hash=True), + ) + + @staticmethod + def _target_path(project: str, version: str | None) -> str: + name = version if version is not None else 'latest' + return f'{project}/{name}.json' + + @staticmethod + def _wheel_filename(project: str, version: str) -> str: + distribution = project.replace('-', '_') + return f'{distribution}-{version}-py3-none-any.whl' + + def _direct_wheel_url(self, project: str, version: str) -> str: + return f'{self._repository_url}/wheels/{project}/{self._wheel_filename(project, version)}' + + @staticmethod + def _validate_pointer(project: str, pointer: dict) -> None: + for key in REQUIRED_POINTER_KEYS: + if key not in pointer: + raise MalformedPointerError(project, key) + if not pointer['wheel_path'].startswith('/'): + raise MalformedPointerError(project, 'wheel_path') + + @staticmethod + def _verify_content(project: str, content: bytes, pointer: dict) -> None: + if len(content) != pointer['length']: + raise LengthMismatch(project, pointer['length'], len(content)) + actual_digest = hashlib.sha256(content).hexdigest() + if actual_digest != pointer['digest']: + raise DigestMismatch(project, pointer['digest'], actual_digest) + + def get_pointer(self, project: str, version: str | None = None) -> dict: + """Return the pointer JSON for *project* at *version* (or 'latest' when None).""" + with tempfile.TemporaryDirectory() as tmp: + metadata_dir = Path(tmp) / 'metadata' + target_dir = Path(tmp) / 'targets' + metadata_dir.mkdir() + target_dir.mkdir() + + target_path = self._target_path(project, version) + self._bootstrap_metadata_dir(metadata_dir) + updater = self._make_updater(metadata_dir, target_dir) + updater.refresh() + + target_info = updater.get_targetinfo(target_path) + if target_info is None: + label = version if version is not None else 'latest stable' + raise TargetNotFoundError(f'No TUF target for {project!r} version {label!r}') + + pointer_path = target_dir / target_path + pointer_path.parent.mkdir(parents=True, exist_ok=True) + updater.download_target(target_info, pointer_path) + + return json.loads(pointer_path.read_text(encoding='utf-8')) + + def download(self, project: str, version: str | None = None, dest_dir: Path | None = None) -> Path: + """Download and verify the wheel for *project* at *version*; return its path.""" + if self._disable_verification: + if version is None: + raise MissingVersion('unsafe-disable-verification requires an explicit --version') + wheel_url = self._direct_wheel_url(project, version) + wheel_filename = self._wheel_filename(project, version) + pointer: dict | None = None + else: + pointer = self.get_pointer(project, version) + self._validate_pointer(project, pointer) + wheel_url = self._repository_url + pointer['wheel_path'] + wheel_filename = Path(pointer['wheel_path']).name + + dest = (dest_dir or Path(tempfile.mkdtemp())) / wheel_filename + + logger.info('Downloading wheel from %s', wheel_url) + with urllib.request.urlopen(wheel_url, timeout=WHEEL_FETCH_TIMEOUT_SECONDS) as resp: + content = resp.read() + + if pointer is not None: + self._verify_content(project, content, pointer) + + dest.write_bytes(content) + logger.info('Wrote %s to %s', wheel_filename, dest) + return dest diff --git a/datadog_checks_downloader/datadog_checks/downloader/exceptions.py b/datadog_checks_downloader/datadog_checks/downloader/exceptions.py index bb6b75e05a156..db8764040a700 100644 --- a/datadog_checks_downloader/datadog_checks/downloader/exceptions.py +++ b/datadog_checks_downloader/datadog_checks/downloader/exceptions.py @@ -30,6 +30,10 @@ def __str__(self): return '{}'.format(self.standard_distribution_name) +class MissingVersion(CLIError): + """Raised when --version is required but absent (e.g. with --unsafe-disable-verification).""" + + # Exceptions for the download module. @@ -37,6 +41,41 @@ class TargetNotFoundError(ChecksDownloaderException): """An exception raised when a target is not found.""" +class MalformedPointerError(ChecksDownloaderException): + """Raised when a TUF-signed pointer JSON is invalid or missing fields.""" + + def __init__(self, project: str, field: str): + self.project = project + self.field = field + + def __str__(self) -> str: + return f'{self.project}: pointer field {self.field!r} is missing or malformed' + + +class DigestMismatch(ChecksDownloaderException): + """Raised when the downloaded wheel's sha256 does not match the pointer.""" + + def __init__(self, project: str, expected: str, actual: str): + self.project = project + self.expected = expected + self.actual = actual + + def __str__(self) -> str: + return f'{self.project}: expected digest {self.expected}, got {self.actual}' + + +class LengthMismatch(ChecksDownloaderException): + """Raised when the downloaded wheel's byte length does not match the pointer.""" + + def __init__(self, project: str, expected: int, actual: int): + self.project = project + self.expected = expected + self.actual = actual + + def __str__(self) -> str: + return f'{self.project}: expected length {self.expected}, got {self.actual}' + + class IncorrectRootLayoutType(ChecksDownloaderException): def __init__(self, found, expected): self.found = found diff --git a/datadog_checks_downloader/pyproject.toml b/datadog_checks_downloader/pyproject.toml index 56ecc4d80baee..b40dcb4e75d39 100644 --- a/datadog_checks_downloader/pyproject.toml +++ b/datadog_checks_downloader/pyproject.toml @@ -55,6 +55,9 @@ include = [ include = [ "/datadog_checks/downloader", ] +artifacts = [ + "/datadog_checks/downloader/data/v2/metadata/root.json", +] dev-mode-dirs = [ ".", ] diff --git a/datadog_checks_downloader/tests/test_unit.py b/datadog_checks_downloader/tests/test_unit.py index 9170abf3ee09a..160b7c584aeef 100644 --- a/datadog_checks_downloader/tests/test_unit.py +++ b/datadog_checks_downloader/tests/test_unit.py @@ -1,7 +1,28 @@ # (C) Datadog, Inc. 2023-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import urllib.error + +import pytest +from tuf.api.exceptions import DownloadError + +from datadog_checks.downloader.cli import _v2_failure_category from datadog_checks.downloader.download import TUFDownloader +from datadog_checks.downloader.exceptions import TargetNotFoundError + + +@pytest.mark.parametrize( + 'exc,expected', + [ + pytest.param(TargetNotFoundError('missing'), 'target version not found', id='target-not-found'), + pytest.param(urllib.error.URLError('timeout'), 'network error', id='network-urlerror'), + pytest.param(DownloadError('boom'), 'network error', id='network-downloaderror'), + pytest.param(TimeoutError('slow'), 'network error', id='network-timeout'), + pytest.param(ValueError('bad pointer'), 'other', id='other'), + ], +) +def test_v2_failure_category(exc, expected): + assert _v2_failure_category(exc) == expected def test_non_official_wheel_filter(mocker): diff --git a/datadog_checks_downloader/tests/test_v2_downloader.py b/datadog_checks_downloader/tests/test_v2_downloader.py new file mode 100644 index 0000000000000..db1977db2be0d --- /dev/null +++ b/datadog_checks_downloader/tests/test_v2_downloader.py @@ -0,0 +1,319 @@ +# (C) Datadog, Inc. 2024-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +"""Unit tests for TUFPointerDownloader (v2 repository format) and the v2 CLI surface.""" + +import hashlib +import json +import urllib.error +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from tuf.api.exceptions import DownloadError + +from datadog_checks.downloader import cli +from datadog_checks.downloader.download_v2 import TUFPointerDownloader +from datadog_checks.downloader.exceptions import ( + DigestMismatch, + LengthMismatch, + MalformedPointerError, + MissingVersion, + NonCanonicalVersion, + NonDatadogPackage, + TargetNotFoundError, +) + +pytestmark = pytest.mark.offline + +PROJECT = 'datadog-postgres' +VERSION = '14.0.0' +WHEEL_NAME = f'datadog_postgres-{VERSION}-py3-none-any.whl' +WHEEL_CONTENT = b'fake wheel bytes for testing' +WHEEL_DIGEST = hashlib.sha256(WHEEL_CONTENT).hexdigest() +WHEEL_LENGTH = len(WHEEL_CONTENT) +REPO_URL = 'https://agent-integration-wheels-staging.s3.amazonaws.com' + +POINTER = { + 'digest': WHEEL_DIGEST, + 'length': WHEEL_LENGTH, + 'version': VERSION, + 'repository': REPO_URL, + 'wheel_path': f'/wheels/{PROJECT}/{WHEEL_NAME}', + 'attestation_path': f'/attestations/{PROJECT}/{VERSION}.sigstore.json', +} + + +def _mock_tuf_updater(pointer: dict) -> MagicMock: + pointer_bytes = json.dumps(pointer).encode() + mock_updater = MagicMock() + mock_updater.get_targetinfo.return_value = MagicMock() + + def fake_download_target(_target_info, dest_path): + Path(dest_path).parent.mkdir(parents=True, exist_ok=True) + Path(dest_path).write_bytes(pointer_bytes) + + mock_updater.download_target.side_effect = fake_download_target + return mock_updater + + +def _mock_response(content: bytes) -> MagicMock: + response = MagicMock() + response.__enter__ = lambda s: s + response.__exit__ = MagicMock(return_value=False) + response.read.return_value = content + return response + + +@pytest.fixture +def mock_urlopen(): + with patch('datadog_checks.downloader.download_v2.urllib.request.urlopen') as mock: + mock.return_value = _mock_response(WHEEL_CONTENT) + yield mock + + +@pytest.fixture +def mock_updater_cls(): + with patch('datadog_checks.downloader.download_v2.Updater') as mock: + mock.return_value = _mock_tuf_updater(POINTER) + yield mock + + +class TestTargetResolution: + @pytest.mark.parametrize( + 'version,expected_target', + [ + pytest.param(VERSION, f'{PROJECT}/{VERSION}.json', id='explicit-version'), + pytest.param(None, f'{PROJECT}/latest.json', id='missing-version'), + ], + ) + def test_get_pointer_requests_expected_target(self, mock_urlopen, mock_updater_cls, version, expected_target): + downloader = TUFPointerDownloader(repository_url=REPO_URL) + downloader.get_pointer(PROJECT, version=version) + + mock_updater = mock_updater_cls.return_value + assert mock_updater.get_targetinfo.call_args[0][0] == expected_target + + +class TestHappyPath: + def test_download_returns_wheel_path(self, mock_urlopen, mock_updater_cls, tmp_path): + downloader = TUFPointerDownloader(repository_url=REPO_URL) + wheel_path = downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + + assert wheel_path.exists() + assert wheel_path.read_bytes() == WHEEL_CONTENT + assert wheel_path.name == WHEEL_NAME + + def test_repository_flag_overrides_pointer_repository(self, mock_urlopen, mock_updater_cls, tmp_path): + prod_pointer = {**POINTER, 'repository': 'https://agent-integration-wheels-prod.s3.amazonaws.com'} + mock_updater_cls.return_value = _mock_tuf_updater(prod_pointer) + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + + mock_urlopen.assert_called_once_with( + f'{REPO_URL}/wheels/{PROJECT}/{WHEEL_NAME}', + timeout=60, + ) + + +class TestTargetNotFound: + def test_raises_when_tuf_target_absent(self, mock_urlopen, mock_updater_cls): + mock_updater = MagicMock() + mock_updater.get_targetinfo.return_value = None + mock_updater_cls.return_value = mock_updater + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + with pytest.raises(TargetNotFoundError, match=PROJECT): + downloader.get_pointer(PROJECT, version='99.99.99') + + +class TestDigestMismatch: + def test_raises_on_corrupted_wheel(self, mock_urlopen, mock_updater_cls, tmp_path): + tampered = b'tampered bytes that match the pointer length'[:WHEEL_LENGTH] + mock_urlopen.return_value = _mock_response(tampered) + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + with pytest.raises(DigestMismatch, match=PROJECT): + downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + assert not (tmp_path / WHEEL_NAME).exists() + + +class TestLengthMismatch: + def test_raises_when_pointer_length_does_not_match_wheel(self, mock_urlopen, mock_updater_cls, tmp_path): + bad_pointer = {**POINTER, 'length': WHEEL_LENGTH + 1} + mock_updater_cls.return_value = _mock_tuf_updater(bad_pointer) + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + with pytest.raises(LengthMismatch) as exc_info: + downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + assert exc_info.value.expected == WHEEL_LENGTH + 1 + assert exc_info.value.actual == WHEEL_LENGTH + assert not (tmp_path / WHEEL_NAME).exists() + + +class TestMalformedPointer: + @pytest.mark.parametrize('missing_key', ['digest', 'length', 'wheel_path']) + def test_raises_when_required_key_missing(self, mock_urlopen, mock_updater_cls, tmp_path, missing_key): + broken_pointer = {k: v for k, v in POINTER.items() if k != missing_key} + mock_updater_cls.return_value = _mock_tuf_updater(broken_pointer) + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + with pytest.raises(MalformedPointerError, match=missing_key): + downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + + def test_raises_when_wheel_path_missing_leading_slash(self, mock_urlopen, mock_updater_cls, tmp_path): + no_slash_pointer = {**POINTER, 'wheel_path': f'wheels/{PROJECT}/{WHEEL_NAME}'} + mock_updater_cls.return_value = _mock_tuf_updater(no_slash_pointer) + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + with pytest.raises(MalformedPointerError, match='wheel_path'): + downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + mock_urlopen.assert_not_called() + + +class TestNetworkErrorMidDownload: + def test_http_error_propagates(self, mock_urlopen, mock_updater_cls, tmp_path): + mock_urlopen.side_effect = urllib.error.HTTPError( + url='http://example/x.whl', code=500, msg='boom', hdrs=None, fp=None + ) + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + with pytest.raises(urllib.error.HTTPError): + downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + + def test_url_error_propagates(self, mock_urlopen, mock_updater_cls, tmp_path): + mock_urlopen.side_effect = urllib.error.URLError('unreachable') + + downloader = TUFPointerDownloader(repository_url=REPO_URL) + with pytest.raises(urllib.error.URLError): + downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + + +class TestDisableVerification: + def test_directly_downloads_wheel_without_tuf_or_digest_checks(self, mock_urlopen, mock_updater_cls, tmp_path): + content = b'bytes not matching any signed pointer' + mock_urlopen.return_value = _mock_response(content) + + downloader = TUFPointerDownloader(repository_url=REPO_URL, disable_verification=True) + wheel_path = downloader.download(PROJECT, version=VERSION, dest_dir=tmp_path) + + mock_urlopen.assert_called_once_with( + f'{REPO_URL}/wheels/{PROJECT}/{WHEEL_NAME}', + timeout=60, + ) + assert wheel_path.name == WHEEL_NAME + assert wheel_path.read_bytes() == content + mock_updater_cls.assert_not_called() + + def test_direct_download_requires_explicit_version(self, tmp_path): + downloader = TUFPointerDownloader(repository_url=REPO_URL, disable_verification=True) + with pytest.raises(MissingVersion, match='requires an explicit --version'): + downloader.download(PROJECT, dest_dir=tmp_path) + + +class TestInstantiateV2Downloader: + def test_rejects_non_datadog_package(self, monkeypatch): + monkeypatch.setattr('sys.argv', ['downloader', 'requests']) + with pytest.raises(NonDatadogPackage, match='requests'): + cli.instantiate_v2_downloader() + + def test_rejects_non_canonical_version(self, monkeypatch): + monkeypatch.setattr('sys.argv', ['downloader', 'datadog-postgres', '--version', 'banana']) + with pytest.raises(NonCanonicalVersion, match='banana'): + cli.instantiate_v2_downloader() + + def test_does_not_warn_when_v1_compat_flags_are_parsed(self, monkeypatch, capsys): + monkeypatch.setattr('sys.argv', ['downloader', 'datadog-postgres', '--type', 'core', '--ignore-python-version']) + cli.instantiate_v2_downloader() + assert capsys.readouterr().err == '' + + def test_warns_for_v1_compat_flags_in_strict_v2_mode(self, monkeypatch, capsys): + monkeypatch.setattr( + 'sys.argv', ['downloader', 'datadog-postgres', '--v2', '--type', 'core', '--ignore-python-version'] + ) + _, _, _, args = cli.instantiate_v2_downloader() + cli.warn_v2_ignored_args(args) + stderr = capsys.readouterr().err + assert 'WARNING: --type' in stderr + assert 'NOTE: --ignore-python-version' in stderr + + def test_force_flag_is_silently_ignored(self, monkeypatch, capsys): + monkeypatch.setattr('sys.argv', ['downloader', 'datadog-postgres', '--force']) + cli.instantiate_v2_downloader() + assert capsys.readouterr().err == '' + + +class TestCliDownloadFallback: + """Covers the cli.download() v2-attempt-then-v1-fallback orchestration.""" + + def test_strict_v2_raises_on_v2_failure(self, monkeypatch): + monkeypatch.setattr('sys.argv', ['downloader', 'datadog-postgres', '--v2']) + monkeypatch.setattr(cli, 'run_v2_downloader', MagicMock(side_effect=TargetNotFoundError('missing'))) + v1 = MagicMock() + monkeypatch.setattr(cli, 'run_downloader', v1) + monkeypatch.setattr(cli, 'instantiate_downloader', MagicMock(return_value=(None, None, None, None))) + + with pytest.raises(TargetNotFoundError): + cli.download() + v1.assert_not_called() + + @pytest.mark.parametrize( + 'fallback_exc', + [ + pytest.param(MissingVersion('missing'), id='missing-version'), + pytest.param(TargetNotFoundError('missing'), id='target-not-found'), + pytest.param(DownloadError('unreachable'), id='download-error'), + pytest.param(TimeoutError('slow'), id='timeout-error'), + pytest.param(urllib.error.URLError('unreachable'), id='url-error'), + ], + ) + def test_default_falls_back_to_v1_on_expected_v2_failures(self, monkeypatch, fallback_exc): + monkeypatch.setattr('sys.argv', ['downloader', 'datadog-postgres']) + monkeypatch.setattr(cli, 'run_v2_downloader', MagicMock(side_effect=fallback_exc)) + v1 = MagicMock() + monkeypatch.setattr(cli, 'run_downloader', v1) + monkeypatch.setattr(cli, 'instantiate_downloader', MagicMock(return_value=('d', 'n', 'v', False))) + + cli.download() + v1.assert_called_once_with('d', 'n', 'v', False) + + def test_default_unsafe_disable_verification_without_version_falls_back_to_v1(self, monkeypatch): + monkeypatch.setattr('sys.argv', ['downloader', 'datadog-postgres', '--unsafe-disable-verification']) + monkeypatch.setattr(cli, 'run_v2_downloader', MagicMock(side_effect=MissingVersion('missing'))) + v1 = MagicMock() + monkeypatch.setattr(cli, 'run_downloader', v1) + monkeypatch.setattr(cli, 'instantiate_downloader', MagicMock(return_value=('d', 'n', None, False))) + + cli.download() + v1.assert_called_once_with('d', 'n', None, False) + + def test_non_datadog_package_does_not_fall_back_to_v1(self, monkeypatch): + monkeypatch.setattr('sys.argv', ['downloader', 'requests']) + v1 = MagicMock() + monkeypatch.setattr(cli, 'run_downloader', v1) + monkeypatch.setattr(cli, 'instantiate_downloader', MagicMock()) + + with pytest.raises(NonDatadogPackage): + cli.download() + v1.assert_not_called() + + @pytest.mark.parametrize( + 'integrity_exc', + [ + pytest.param(DigestMismatch(PROJECT, 'a', 'b'), id='digest-mismatch'), + pytest.param(LengthMismatch(PROJECT, 1, 2), id='length-mismatch'), + pytest.param(MalformedPointerError(PROJECT, 'digest'), id='malformed-pointer'), + ], + ) + def test_integrity_errors_do_not_fall_back_to_v1(self, monkeypatch, integrity_exc): + monkeypatch.setattr('sys.argv', ['downloader', 'datadog-postgres']) + monkeypatch.setattr(cli, 'run_v2_downloader', MagicMock(side_effect=integrity_exc)) + v1 = MagicMock() + monkeypatch.setattr(cli, 'run_downloader', v1) + monkeypatch.setattr(cli, 'instantiate_downloader', MagicMock()) + + with pytest.raises(type(integrity_exc)): + cli.download() + v1.assert_not_called() diff --git a/ddev/changelog.d/23828.added b/ddev/changelog.d/23828.added new file mode 100644 index 0000000000000..1a808e1d73e89 --- /dev/null +++ b/ddev/changelog.d/23828.added @@ -0,0 +1 @@ +Print the exact workflow run URL when dispatching `ddev dep promote`, via a new `return_run_details` option on `GitHubManager.dispatch_workflow`. diff --git a/ddev/src/ddev/cli/dep/promote.py b/ddev/src/ddev/cli/dep/promote.py index d298e7bbe1c23..c7f3210de3671 100644 --- a/ddev/src/ddev/cli/dep/promote.py +++ b/ddev/src/ddev/cli/dep/promote.py @@ -3,6 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from __future__ import annotations +import logging import re from typing import TYPE_CHECKING @@ -39,20 +40,26 @@ def promote(app: Application, pr_url: str): pr_number = int(match.group(1)) - with app.status(f'Fetching PR #{pr_number} head...'): - head_sha, head_ref = app.github.get_pr_head(pr_number) + httpx_logger = logging.getLogger('httpx') + previous_level = httpx_logger.level + httpx_logger.setLevel(logging.WARNING) + try: + with app.status(f'Fetching PR #{pr_number} head...'): + head_sha, head_ref = app.github.get_pr_head(pr_number) - app.display_info(f'PR #{pr_number} β€” branch: {head_ref}, SHA: {head_sha}') + app.display_info(f'PR #{pr_number}: branch {head_ref}, SHA {head_sha}') - with app.status('Dispatching promote workflow...'): - app.github.dispatch_workflow( - workflow_id=PROMOTE_WORKFLOW, - ref=PROMOTE_WORKFLOW_REF, - inputs={'pr_number': str(pr_number), 'head_sha': head_sha}, - ) + with app.status('Dispatching promote workflow...'): + run_details = app.github.dispatch_workflow( + workflow_id=PROMOTE_WORKFLOW, + ref=PROMOTE_WORKFLOW_REF, + inputs={'pr_number': str(pr_number), 'head_sha': head_sha}, + return_run_details=True, + ) - runs_url = ( - f'https://github.com/{app.github.repo_id}/actions/workflows/{PROMOTE_WORKFLOW}?query=event%3Aworkflow_dispatch' - ) - app.display_success(f'Promote workflow dispatched for PR #{pr_number}.') - app.display_info(f'Recent runs: {runs_url}') + if not run_details: + app.abort('Workflow dispatched but no run details were returned.') + app.display_success(f'Promote workflow dispatched for PR #{pr_number}.') + app.display_info(f'Workflow run: {run_details["html_url"]}') + finally: + httpx_logger.setLevel(previous_level) diff --git a/ddev/src/ddev/utils/github.py b/ddev/src/ddev/utils/github.py index ef314fb7d9fe7..bae40dc9ff23c 100644 --- a/ddev/src/ddev/utils/github.py +++ b/ddev/src/ddev/utils/github.py @@ -6,9 +6,11 @@ import json from functools import cached_property from time import time -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, overload if TYPE_CHECKING: + from typing import Any, Literal + from httpx import Client from ddev.cli.terminal import BorrowedStatus @@ -217,12 +219,48 @@ def get_pull_request_labels(self, pr_number: int) -> list[str] | None: return None return [label['name'] for label in response.json().get('labels', [])] - def dispatch_workflow(self, workflow_id: str, ref: str, inputs: dict[str, Any]) -> None: - """Trigger a workflow_dispatch event.""" - self.__api_post( + @overload + def dispatch_workflow( + self, + workflow_id: str, + ref: str, + inputs: dict[str, Any], + return_run_details: Literal[False] = False, + ) -> None: ... + + @overload + def dispatch_workflow( + self, + workflow_id: str, + ref: str, + inputs: dict[str, Any], + return_run_details: Literal[True], + ) -> dict[str, Any]: ... + + def dispatch_workflow( + self, + workflow_id: str, + ref: str, + inputs: dict[str, Any], + return_run_details: bool = False, + ) -> dict[str, Any] | None: + """Trigger a workflow_dispatch event. + + When ``return_run_details`` is true, request the new run's details from + the API and return the parsed JSON response (``workflow_run_id``, + ``run_url``, ``html_url``). The default keeps the prior fire-and-forget + behavior and returns ``None``. + """ + payload: dict[str, Any] = {'ref': ref, 'inputs': inputs} + if return_run_details: + payload['return_run_details'] = True + response = self.__api_post( self.WORKFLOW_DISPATCH_API.format(repo_id=self.repo_id, workflow_id=workflow_id), - content=json.dumps({'ref': ref, 'inputs': inputs}), + content=json.dumps(payload), ) + if not return_run_details: + return None + return response.json() def get_pull_request_comments(self, pr_number: int) -> list[dict]: response = self.__api_get( diff --git a/ddev/tests/cli/dep/conftest.py b/ddev/tests/cli/dep/conftest.py new file mode 100644 index 0000000000000..8758a8cbc804c --- /dev/null +++ b/ddev/tests/cli/dep/conftest.py @@ -0,0 +1,32 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +from __future__ import annotations + +import logging +from collections.abc import Generator +from typing import TYPE_CHECKING + +import pytest + +if TYPE_CHECKING: + from ddev.config.file import ConfigFileWithOverrides + + +@pytest.fixture(autouse=True) +def configure_github_credentials(config_file: ConfigFileWithOverrides) -> None: + """Provide github credentials so commands that touch app.github do not abort.""" + config_file.model.github = {'user': 'test-user', 'token': 'test-token'} + config_file.save() + + +@pytest.fixture +def httpx_at_debug() -> Generator[logging.Logger, None, None]: + """Force the httpx logger to DEBUG and restore its previous level on teardown.""" + logger = logging.getLogger('httpx') + previous_level = logger.level + logger.setLevel(logging.DEBUG) + try: + yield logger + finally: + logger.setLevel(previous_level) diff --git a/ddev/tests/cli/dep/test_promote.py b/ddev/tests/cli/dep/test_promote.py new file mode 100644 index 0000000000000..2d55db0660c63 --- /dev/null +++ b/ddev/tests/cli/dep/test_promote.py @@ -0,0 +1,79 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import logging + +import pytest + +RUN_DETAILS = { + 'workflow_run_id': 999, + 'run_url': 'https://api.github.com/repos/DataDog/integrations-core/actions/runs/999', + 'html_url': 'https://github.com/DataDog/integrations-core/actions/runs/999', +} + + +def test_promote_dispatches_workflow_and_prints_run_url(ddev, mocker): + mocker.patch('ddev.utils.github.GitHubManager.get_pr_head', return_value=('deadbeef', 'feature-branch')) + dispatch = mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow', return_value=RUN_DETAILS) + + result = ddev('dep', 'promote', 'https://github.com/DataDog/integrations-core/pull/12345') + + assert result.exit_code == 0, result.output + dispatch.assert_called_once_with( + workflow_id='dependency-wheel-promotion.yaml', + ref='master', + inputs={'pr_number': '12345', 'head_sha': 'deadbeef'}, + return_run_details=True, + ) + assert 'PR #12345' in result.output + assert 'feature-branch' in result.output + assert 'deadbeef' in result.output + assert RUN_DETAILS['html_url'] in result.output + assert 'Recent runs' not in result.output + assert 'query=event%3Aworkflow_dispatch' not in result.output + + +def test_promote_invalid_pr_url_aborts(ddev): + result = ddev('dep', 'promote', 'https://example.invalid/not-a-pr') + + assert result.exit_code != 0 + assert 'Could not extract a PR number' in result.output + + +def test_promote_aborts_when_no_run_details_returned(ddev, mocker): + mocker.patch('ddev.utils.github.GitHubManager.get_pr_head', return_value=('deadbeef', 'feature-branch')) + mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow', return_value=None) + + result = ddev('dep', 'promote', 'https://github.com/DataDog/integrations-core/pull/12345') + + assert result.exit_code != 0 + assert 'no run details were returned' in result.output + assert 'Promote workflow dispatched' not in result.output + + +def test_promote_suppresses_httpx_logs_and_restores_level(ddev, mocker, httpx_at_debug): + captured_levels = [] + + def capture_level(*_args, **_kwargs): + captured_levels.append(httpx_at_debug.level) + return ('deadbeef', 'feature-branch') + + mocker.patch('ddev.utils.github.GitHubManager.get_pr_head', side_effect=capture_level) + mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow', return_value=RUN_DETAILS) + + result = ddev('dep', 'promote', 'https://github.com/DataDog/integrations-core/pull/12345') + + assert result.exit_code == 0, result.output + assert captured_levels == [logging.WARNING] + assert httpx_at_debug.level == logging.DEBUG + + +def test_promote_restores_httpx_log_level_on_failure(ddev, mocker, httpx_at_debug): + """Ensure the finally branch restores the previous httpx logger level even when an API call raises.""" + mocker.patch('ddev.utils.github.GitHubManager.get_pr_head', side_effect=RuntimeError('boom')) + mocker.patch('ddev.utils.github.GitHubManager.dispatch_workflow') + + with pytest.raises(RuntimeError, match='boom'): + ddev('dep', 'promote', 'https://github.com/DataDog/integrations-core/pull/12345') + + assert httpx_at_debug.level == logging.DEBUG diff --git a/ddev/tests/utils/test_github.py b/ddev/tests/utils/test_github.py index 59b67d6a97fa5..e1f103dadcb08 100644 --- a/ddev/tests/utils/test_github.py +++ b/ddev/tests/utils/test_github.py @@ -1,6 +1,8 @@ # (C) Datadog, Inc. 2023-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import json + import pytest from ddev.utils.github import PullRequest @@ -83,3 +85,46 @@ def test_create_label(self, network_replay, github_manager): assert label.json()['name'] == 'my_custom_label' assert label.json()['color'] == 'ff0000' + + +def test_dispatch_workflow_default_returns_none(github_manager, mocker): + """Default dispatch_workflow keeps the prior fire-and-forget behavior.""" + response = mocker.MagicMock() + api_post = mocker.patch('ddev.utils.github.GitHubManager._GitHubManager__api_post', return_value=response) + + result = github_manager.dispatch_workflow( + workflow_id='example.yaml', + ref='master', + inputs={'pr_number': '123', 'head_sha': 'deadbeef'}, + ) + + assert result is None + api_post.assert_called_once() + payload = json.loads(api_post.call_args.kwargs['content']) + assert payload == {'ref': 'master', 'inputs': {'pr_number': '123', 'head_sha': 'deadbeef'}} + assert 'return_run_details' not in payload + + +def test_dispatch_workflow_return_run_details_sends_flag_and_returns_json(github_manager, mocker): + """When return_run_details is true, the payload includes the flag and the parsed JSON is returned.""" + run_details = { + 'workflow_run_id': 42, + 'run_url': 'https://api.github.com/repos/o/r/actions/runs/42', + 'html_url': 'https://github.com/o/r/actions/runs/42', + } + response = mocker.MagicMock() + response.json.return_value = run_details + api_post = mocker.patch('ddev.utils.github.GitHubManager._GitHubManager__api_post', return_value=response) + + result = github_manager.dispatch_workflow( + workflow_id='example.yaml', + ref='master', + inputs={'pr_number': '123', 'head_sha': 'deadbeef'}, + return_run_details=True, + ) + + assert result == run_details + payload = json.loads(api_post.call_args.kwargs['content']) + assert payload['return_run_details'] is True + assert payload['ref'] == 'master' + assert payload['inputs'] == {'pr_number': '123', 'head_sha': 'deadbeef'} diff --git a/kubernetes/assets/monitors/monitor_deployments_replicas.json b/kubernetes/assets/monitors/monitor_deployments_replicas.json index 39b6fb9816c5f..374a02aae8603 100644 --- a/kubernetes/assets/monitors/monitor_deployments_replicas.json +++ b/kubernetes/assets/monitors/monitor_deployments_replicas.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-07-28", - "last_updated_at": "2025-06-12", + "last_updated_at": "2026-04-09", "title": "Kubernetes Deployment Replicas are failing", "tags": [ "integration:kubernetes" ], "description": "Kubernetes replicas are clones that facilitate self-healing for pods. Each pod has a desired number of replica Pods that should be running at any given time. This monitor tracks the number of replicas that are failing per deployment.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nThere are at least 2 or more missing replicas for Deployment {{kube_namespace.name}}/{{kube_deployment.name}} over the last 15 minutes.\n\n{{/is_alert}}", + "message": "{{#is_alert}}\n\n## What's happening?\nThere are at least 2 or more missing replicas for Deployment {{kube_namespace.name}}/{{kube_deployment.name}} over the last 15 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+kube_deployment:{{kube_deployment.name}}+kube_namespace:{{kube_namespace.name}})\n- [Metrics Explorer (kubernetes_state.deployment.replicas_desired)](/metric/explorer?exp_metric=kubernetes_state.deployment.replicas_desired&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_deployment:{{kube_deployment.name}},kube_namespace:{{kube_namespace.name}}&exp_agg=avg&exp_type=line)\n- [Metrics Explorer (kubernetes_state.deployment.replicas_available)](/metric/explorer?exp_metric=kubernetes_state.deployment.replicas_available&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_deployment:{{kube_deployment.name}},kube_namespace:{{kube_namespace.name}}&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}", "name": "[Kubernetes] Monitor Kubernetes Deployments Replica Pods", "options": { "escalation_message": "", diff --git a/kubernetes/assets/monitors/monitor_node_unavailable.json b/kubernetes/assets/monitors/monitor_node_unavailable.json index 37ff9c574dcec..cc57835121156 100644 --- a/kubernetes/assets/monitors/monitor_node_unavailable.json +++ b/kubernetes/assets/monitors/monitor_node_unavailable.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-07-28", - "last_updated_at": "2025-06-12", + "last_updated_at": "2026-04-09", "title": "Nodes are unavailable", "tags": [ "integration:kubernetes" ], "description": "Kubernetes nodes can either be schedulable or unschedulable. When unschedulable, the node prevents the scheduler from placing new pods onto that node. This monitor tracks the percentage of schedulable nodes.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nThe percentage of schedulable nodes is below 80% for status:schedulable on ({{kube_cluster_name.name}} cluster over the last 15 minutes.\n\n{{/is_alert}}\n\n Keep in mind that this might be expected based on your infrastructure.", + "message": "{{#is_alert}}\n\n## What's happening?\nThe percentage of schedulable nodes is below 80% for status:schedulable on ({{kube_cluster_name.name}} cluster over the last 15 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+status:schedulable)\n- [Hosts](/infrastructure/hosts?scope=kube_cluster_name:{{kube_cluster_name.name}})\n- [Metrics Explorer (kubernetes_state.node.status)](/metric/explorer?exp_metric=kubernetes_state.node.status&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},status:schedulable&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}\n\n Keep in mind that this might be expected based on your infrastructure.", "name": "[Kubernetes] Monitor Unschedulable Kubernetes Nodes", "options": { "escalation_message": "", diff --git a/kubernetes/assets/monitors/monitor_pod_crashloopbackoff.json b/kubernetes/assets/monitors/monitor_pod_crashloopbackoff.json index 1b14f874c716a..317eec3fd0032 100644 --- a/kubernetes/assets/monitors/monitor_pod_crashloopbackoff.json +++ b/kubernetes/assets/monitors/monitor_pod_crashloopbackoff.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-07-28", - "last_updated_at": "2025-06-12", + "last_updated_at": "2026-04-09", "title": "Pod is in a CrashloopBackOff state", "tags": [ "integration:kubernetes" ], "description": "The status CrashloopBackOff means that a container in the Pod is started, crashes, and is restarted, over and over again. This monitor tracks when a pod is in a CrashloopBackOff state for your Kubernetes integration.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nAt least one container in pod {{pod_name.name}} on {{kube_namespace.name}} is in a waiting state due to reason crashloopbackoff in the last 10 minutes.\n\n{{/is_alert}}\n\n This alert could generate several alerts for a bad deployment. Adjust the thresholds of the query to suit your infrastructure.", + "message": "{{#is_alert}}\n\n## What's happening?\nAt least one container in pod {{pod_name.name}} on {{kube_namespace.name}} is in a waiting state due to reason crashloopbackoff in the last 10 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+kube_namespace:{{kube_namespace.name}}+pod_name:{{pod_name.name}}+reason:crashloopbackoff)\n- [Pod Explorer](/orchestration/explorer/pod?query={{pod_name.name}})\n- [Metrics Explorer (kubernetes_state.container.status_report.count.waiting)](/metric/explorer?exp_metric=kubernetes_state.container.status_report.count.waiting&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_namespace:{{kube_namespace.name}},pod_name:{{pod_name.name}},reason:crashloopbackoff&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}\n\n This alert could generate several alerts for a bad deployment. Adjust the thresholds of the query to suit your infrastructure.", "name": "[Kubernetes] Pod {{pod_name.name}} is CrashloopBackOff on namespace {{kube_namespace.name}}", "options": { "escalation_message": "", diff --git a/kubernetes/assets/monitors/monitor_pod_imagepullbackoff.json b/kubernetes/assets/monitors/monitor_pod_imagepullbackoff.json index 07f30a6eb7b44..a42c9be57e11d 100644 --- a/kubernetes/assets/monitors/monitor_pod_imagepullbackoff.json +++ b/kubernetes/assets/monitors/monitor_pod_imagepullbackoff.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-09-15", - "last_updated_at": "2025-06-12", + "last_updated_at": "2026-04-09", "title": "Pod is in an ImagePullBackOff state", "tags": [ "integration:kubernetes" ], "description": "The status ImagePullBackOff means that a container could not start because Kubernetes could not pull a container image. This monitor tracks when a pod is in an ImagePullBackOff state for your Kubernetes integration.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nAt least one container in pod {{pod_name.name}} on namespace {{kube_namespace.name}} is in a waiting state due to an ImagePullBackOff error in the last 10 minutes.\n\n{{/is_alert}}\n\n This could happen for several reasons, for example a bad image path or tag or if the credentials for pulling images are not configured properly.", + "message": "{{#is_alert}}\n\n## What's happening?\nAt least one container in pod {{pod_name.name}} on namespace {{kube_namespace.name}} is in a waiting state due to an ImagePullBackOff error in the last 10 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+kube_namespace:{{kube_namespace.name}}+pod_name:{{pod_name.name}}+reason:imagepullbackoff)\n- [Pod Explorer](/orchestration/explorer/pod?query={{pod_name.name}})\n- [Metrics Explorer (kubernetes_state.container.status_report.count.waiting)](/metric/explorer?exp_metric=kubernetes_state.container.status_report.count.waiting&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_namespace:{{kube_namespace.name}},pod_name:{{pod_name.name}},reason:imagepullbackoff&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}\n\n This could happen for several reasons, for example a bad image path or tag or if the credentials for pulling images are not configured properly.", "name": "[Kubernetes] Pod {{pod_name.name}} is ImagePullBackOff on namespace {{kube_namespace.name}}", "options": { "escalation_message": "", diff --git a/kubernetes/assets/monitors/monitor_pod_oomkilled.json b/kubernetes/assets/monitors/monitor_pod_oomkilled.json index 3eece4a5d9e41..e4f7ad7aa755c 100644 --- a/kubernetes/assets/monitors/monitor_pod_oomkilled.json +++ b/kubernetes/assets/monitors/monitor_pod_oomkilled.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2025-09-15", - "last_updated_at": "2025-09-15", + "last_updated_at": "2026-04-09", "title": "Pod is in an OOMKilled state", "tags": [ "integration:kubernetes" ], "description": "The status OOMKilled means that a container was killed because it exceeded memory limits or the node ran out of available memory. This monitor tracks when a pod is in an OOMKilled state for your Kubernetes integration.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nThere has been at least one container terminated in pod {{pod_name.name}} on namespace {{kube_namespace.name}} with reason oomkilled in the last 10 minutes.\n\n{{/is_alert}}\n\n This could happen for several reasons, for example insufficient memory limits, memory leaks in the application, or the node running out of available memory.", + "message": "{{#is_alert}}\n\n## What's happening?\nThere has been at least one container terminated in pod {{pod_name.name}} on namespace {{kube_namespace.name}} with reason oomkilled in the last 10 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+kube_namespace:{{kube_namespace.name}}+pod_name:{{pod_name.name}}+reason:oomkilled)\n- [Pod Explorer](/orchestration/explorer/pod?query={{pod_name.name}})\n- [Metrics Explorer (kubernetes.containers.state.terminated)](/metric/explorer?exp_metric=kubernetes.containers.state.terminated&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_namespace:{{kube_namespace.name}},pod_name:{{pod_name.name}},reason:oomkilled&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}\n\n This could happen for several reasons, for example insufficient memory limits, memory leaks in the application, or the node running out of available memory.", "name": "[Kubernetes] Pod {{pod_name.name}} is OOMKilled on namespace {{kube_namespace.name}}", "options": { "escalation_message": "", diff --git a/kubernetes/assets/monitors/monitor_pods_failed_state.json b/kubernetes/assets/monitors/monitor_pods_failed_state.json index 708a41da74ee4..33ee1b348348e 100644 --- a/kubernetes/assets/monitors/monitor_pods_failed_state.json +++ b/kubernetes/assets/monitors/monitor_pods_failed_state.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-07-28", - "last_updated_at": "2025-06-12", + "last_updated_at": "2026-04-09", "title": "Pods are failing", "tags": [ "integration:kubernetes" ], "description": "When a pod is failing it means the container either exited with non-zero status or was terminated by the system. This monitor tracks when more than 10 pods are failing for a given Kubernetes cluster.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nThe number of failed pods has increased by more than 10 in ({{kube_cluster_name.name}} cluster in the last 5 minutes.\n\n{{/is_alert}}\n\n The threshold of ten pods varies depending on your infrastructure. Change the threshold to suit your needs.", + "message": "{{#is_alert}}\n\n## What's happening?\nThe number of failed pods has increased by more than 10 in ({{kube_cluster_name.name}} cluster in the last 5 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+kube_namespace:{{kube_namespace.name}}+pod_phase:failed)\n- [Metrics Explorer (kubernetes_state.pod.status_phase)](/metric/explorer?exp_metric=kubernetes_state.pod.status_phase&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_namespace:{{kube_namespace.name}},pod_phase:failed&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}\n\n The threshold of ten pods varies depending on your infrastructure. Change the threshold to suit your needs.", "name": "[Kubernetes] Monitor Kubernetes Failed Pods in Namespaces", "options": { "escalation_message": "", diff --git a/kubernetes/assets/monitors/monitor_pods_restarting.json b/kubernetes/assets/monitors/monitor_pods_restarting.json index f35d90c629c09..c7cccced75755 100644 --- a/kubernetes/assets/monitors/monitor_pods_restarting.json +++ b/kubernetes/assets/monitors/monitor_pods_restarting.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-07-28", - "last_updated_at": "2025-06-12", + "last_updated_at": "2026-04-09", "title": "Pods are restarting", "tags": [ "integration:kubernetes" ], "description": "Kubernetes pods restart according to the restart policy. A restarting container can indicate problems with memory, CPU usage, or an application exiting prematurely. This monitor tracks when pods are restarting multiple times.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nThere has been an increase of more than 5 container restarts in the pod {{pod_name.name}} in the last 5 minutes.\n\n{{/is_alert}}", + "message": "{{#is_alert}}\n\n## What's happening?\nThere has been an increase of more than 5 container restarts in the pod {{pod_name.name}} in the last 5 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+pod_name:{{pod_name.name}})\n- [Pod Explorer](/orchestration/explorer/pod?query={{pod_name.name}})\n- [Metrics Explorer (kubernetes.containers.restarts)](/metric/explorer?exp_metric=kubernetes.containers.restarts&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},pod_name:{{pod_name.name}}&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}", "name": "[Kubernetes] Monitor Kubernetes Pods Restarting", "options": { "escalation_message": "", diff --git a/kubernetes/assets/monitors/monitor_statefulset_replicas.json b/kubernetes/assets/monitors/monitor_statefulset_replicas.json index b0954fe2785bb..ef5fbc979d832 100644 --- a/kubernetes/assets/monitors/monitor_statefulset_replicas.json +++ b/kubernetes/assets/monitors/monitor_statefulset_replicas.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-07-28", - "last_updated_at": "2025-06-12", + "last_updated_at": "2026-04-09", "title": "Kubernetes Statefulset Replicas are failing", "tags": [ "integration:kubernetes" ], "description": "Kubernetes replicas are clones that facilitate self-healing for pods. Each pod has a desired number of replica Pods that should be running at any given time. This monitor tracks when the number of replicas per statefulset is falling.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nThere are at least 2 desired replicas that are not ready for {{kube_namespace.name}}/{{kube_stateful_set.name}} StatefulSet over the last 15 minutes.\n\n{{/is_alert}}\n\n This might present an unsafe situation for any further manual operations, such as killing other pods.", + "message": "{{#is_alert}}\n\n## What's happening?\nThere are at least 2 desired replicas that are not ready for {{kube_namespace.name}}/{{kube_stateful_set.name}} StatefulSet over the last 15 minutes.\n\n## Related Links\n\n- [Logs](/logs?query=kube_cluster_name:{{kube_cluster_name.name}}+kube_namespace:{{kube_namespace.name}}+kube_stateful_set:{{kube_stateful_set.name}})\n- [Metrics Explorer (kubernetes_state.statefulset.replicas_desired)](/metric/explorer?exp_metric=kubernetes_state.statefulset.replicas_desired&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_namespace:{{kube_namespace.name}},kube_stateful_set:{{kube_stateful_set.name}}&exp_agg=avg&exp_type=line)\n- [Metrics Explorer (kubernetes_state.statefulset.replicas_ready)](/metric/explorer?exp_metric=kubernetes_state.statefulset.replicas_ready&exp_scope=kube_cluster_name:{{kube_cluster_name.name}},kube_namespace:{{kube_namespace.name}},kube_stateful_set:{{kube_stateful_set.name}}&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}\n\n This might present an unsafe situation for any further manual operations, such as killing other pods.", "name": "[Kubernetes] Monitor Kubernetes Statefulset Replicas", "options": { "escalation_message": "", diff --git a/nginx/assets/monitors/4xx.json b/nginx/assets/monitors/4xx.json index 17fbd04888321..e38ee3436c7e3 100644 --- a/nginx/assets/monitors/4xx.json +++ b/nginx/assets/monitors/4xx.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-09-16", - "last_updated_at": "2026-03-09", + "last_updated_at": "2026-04-09", "title": "Upstream 4xx errors are high", "tags": [ "integration:nginx" ], "description": "NGINX sends requests to upstream peers that can fail eventually. This monitor tracks the count of 4xx HTTP responses to identify issues in the communication between NGINX and the backend servers.", "definition": { - "message": "{{#is_alert}}\n## 🚨 What's happening\n\nAn anomaly has been detected in the number of 4xx HTTP responses from NGINX upstream **{{upstream.name}}** (anomaly score: `{{value}}`, threshold: `{{threshold}}`). The 4xx response rate is significantly higher than normal, indicating that a notable portion of incoming requests are being rejected with client-side error codes.\n\nFirst triggered at **{{first_triggered_at}}**, active for **{{triggered_duration_sec}}** seconds.\n{{/is_alert}}{{#is_recovery}}\n## βœ… Recovered\n\nThe 4xx anomaly for upstream **{{upstream.name}}** has resolved. Current value: `{{value}}`.\n{{/is_recovery}}\n{{^is_recovery}}\n***\n\n## πŸ“ˆ Impact\n\nElevated 4xx error rates can result in failed requests for end users and may expose misconfigurations or broken routes. Services and clients relying on this NGINX upstream may experience partial or complete degradation of functionality.\n\n***\n\n## Runbook\n\n### Initial Troubleshooting Steps\n\n1. **Identify the affected upstream** from the alert (`{{upstream.name}}`).\n2. Open [**Metrics Explorer**](/metric/explorer) and inspect `nginx.upstream.peers.responses.4xx` broken down by `upstream`.\n3. Review NGINX access logs for specific endpoints and status codes:\n ```bash\n tail -f /var/log/nginx/access.log | grep \" 4[0-9][0-9] \"\n ```\n4. Correlate the spike with recent configuration changes, upstream deployments, or traffic shifts.\n\n### Cause and Resolution\n\n| Cause | Resolution |\n| ----- | ---------- |\n| Invalid or removed request paths (404) | Verify routes in NGINX configuration; update upstream routing rules to reflect the current backend state. |\n| Authentication or authorization failures (401/403) | Review auth configuration; check if credentials or access tokens have expired or been revoked. |\n| Malformed client requests (400) | Inspect incoming request headers and payloads; check client-side request construction. |\n| Rate limiting triggered (429) | Review rate limit thresholds; consider scaling upstream services or relaxing limits. |\n| Upstream endpoints renamed or removed | Update NGINX upstream configuration to reflect the current backend service endpoints. |\n\n### Related links\n\n* [Documentation](https://docs.datadoghq.com/integrations/nginx/)\n* [Metrics Explorer](/metric/explorer)\n* [Log Explorer](/logs?query=source%3Anginx)\n\n### Who should be notified?\n\nAssign the appropriate notification handle for this alert (e.g., `@slack-infra`, `@pagerduty-nginx`):\n`@your-team-handle`\n{{/is_recovery}}", + "message": "{{#is_alert}}\n## 🚨 What's happening\n\nAn anomaly has been detected in the number of 4xx HTTP responses from NGINX upstream **{{upstream.name}}** (anomaly score: `{{value}}`, threshold: `{{threshold}}`). The 4xx response rate is significantly higher than normal, indicating that a notable portion of incoming requests are being rejected with client-side error codes.\n\nFirst triggered at **{{first_triggered_at}}**, active for **{{triggered_duration_sec}}** seconds.\n{{/is_alert}}{{#is_recovery}}\n## βœ… Recovered\n\nThe 4xx anomaly for upstream **{{upstream.name}}** has resolved. Current value: `{{value}}`.\n{{/is_recovery}}\n{{^is_recovery}}\n***\n\n## πŸ“ˆ Impact\n\nElevated 4xx error rates can result in failed requests for end users and may expose misconfigurations or broken routes. Services and clients relying on this NGINX upstream may experience partial or complete degradation of functionality.\n\n***\n\n## Runbook\n\n### Initial Troubleshooting Steps\n\n1. **Identify the affected upstream** from the alert (`{{upstream.name}}`).\n2. Open [**Metrics Explorer**](/metric/explorer) and inspect `nginx.upstream.peers.responses.4xx` broken down by `upstream`.\n3. Review NGINX access logs for specific endpoints and status codes:\n ```bash\n tail -f /var/log/nginx/access.log | grep \" 4[0-9][0-9] \"\n ```\n4. Correlate the spike with recent configuration changes, upstream deployments, or traffic shifts.\n\n### Cause and Resolution\n\n| Cause | Resolution |\n| ----- | ---------- |\n| Invalid or removed request paths (404) | Verify routes in NGINX configuration; update upstream routing rules to reflect the current backend state. |\n| Authentication or authorization failures (401/403) | Review auth configuration; check if credentials or access tokens have expired or been revoked. |\n| Malformed client requests (400) | Inspect incoming request headers and payloads; check client-side request construction. |\n| Rate limiting triggered (429) | Review rate limit thresholds; consider scaling upstream services or relaxing limits. |\n| Upstream endpoints renamed or removed | Update NGINX upstream configuration to reflect the current backend service endpoints. |\n\n### Related links\n\n* [Documentation](https://docs.datadoghq.com/integrations/nginx/)\n* [Logs](/logs?query=upstream:{{upstream.name}})\n* [Metrics Explorer (nginx.upstream.peers.responses.4xx)](/metric/explorer?exp_metric=nginx.upstream.peers.responses.4xx&exp_scope=upstream:{{upstream.name}}&exp_agg=avg&exp_type=line)\n\n### Who should be notified?\n\nAssign the appropriate notification handle for this alert (e.g., `@slack-infra`, `@pagerduty-nginx`):\n`@your-team-handle`\n{{/is_recovery}}", "name": "[NGINX] 4xx Errors higher than usual", "options": { "escalation_message": "", diff --git a/nginx/assets/monitors/5xx.json b/nginx/assets/monitors/5xx.json index c7b9ef7201dbc..b98d0bf985336 100644 --- a/nginx/assets/monitors/5xx.json +++ b/nginx/assets/monitors/5xx.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-09-16", - "last_updated_at": "2026-03-09", + "last_updated_at": "2026-04-09", "title": "Upstream 5xx errors are high", "tags": [ "integration:nginx" ], "description": "β€œ5xx upstream request errors” are indicating server issues from backend servers. This monitor tracks the count of 5xx responses from NGINX's upstream peers to identify server-related issues in your web or application infrastructure.", "definition": { - "message": "{{#is_alert}}\n## 🚨 What's happening\n\nAn anomaly has been detected in the number of 5xx HTTP responses from NGINX upstream **{{upstream.name}}** (anomaly score: `{{value}}`, threshold: `{{threshold}}`). The 5xx error rate is significantly higher than normal, indicating that backend servers are failing to handle a notable portion of requests.\n\nFirst triggered at **{{first_triggered_at}}**, active for **{{triggered_duration_sec}}** seconds.\n{{/is_alert}}{{#is_recovery}}\n## βœ… Recovered\n\nThe 5xx anomaly for upstream **{{upstream.name}}** has resolved. Current value: `{{value}}`.\n{{/is_recovery}}\n{{^is_recovery}}\n***\n\n## πŸ“ˆ Impact\n\n5xx errors indicate server-side failures that cause direct service disruptions for users. Dependent services that rely on successful responses from this NGINX upstream may experience cascading failures or degraded functionality.\n\n***\n\n## Runbook\n\n### Initial Troubleshooting Steps\n\n1. **Identify the affected upstream** from the alert (`{{upstream.name}}`).\n2. Open [**Metrics Explorer**](/metric/explorer) and inspect `nginx.upstream.peers.responses.5xx` broken down by `upstream`.\n3. Review NGINX error logs for connection failures or backend errors:\n ```bash\n tail -f /var/log/nginx/error.log\n ```\n4. Check upstream backend service health and application logs.\n5. Correlate the spike with recent deployments or infrastructure changes.\n\n### Cause and Resolution\n\n| Cause | Resolution |\n| ----- | ---------- |\n| Backend server is down or crashed (502) | Verify the upstream service is running; restart the service if needed and check its logs. |\n| Gateway timeout due to slow upstream (504) | Check upstream response times; increase `proxy_read_timeout` if the upstream is legitimately slow. |\n| Application-level errors (500) | Inspect upstream application logs for unhandled exceptions or crashes; roll back recent deployments if correlated. |\n| Service unavailable due to overload (503) | Check upstream server resource utilization; scale out or enable load balancing across more peers. |\n| Resource exhaustion on upstream servers | Review CPU, memory, and connection pool usage on the backend; tune resource limits and autoscaling. |\n\n### Related links\n\n* [Documentation](https://docs.datadoghq.com/integrations/nginx/)\n* [Metrics Explorer](/metric/explorer)\n* [Log Explorer](/logs?query=source%3Anginx)\n\n### Who should be notified?\n\nAssign the appropriate notification handle for this alert (e.g., `@slack-infra`, `@pagerduty-nginx`):\n`@your-team-handle`\n{{/is_recovery}}", + "message": "{{#is_alert}}\n## 🚨 What's happening\n\nAn anomaly has been detected in the number of 5xx HTTP responses from NGINX upstream **{{upstream.name}}** (anomaly score: `{{value}}`, threshold: `{{threshold}}`). The 5xx error rate is significantly higher than normal, indicating that backend servers are failing to handle a notable portion of requests.\n\nFirst triggered at **{{first_triggered_at}}**, active for **{{triggered_duration_sec}}** seconds.\n{{/is_alert}}{{#is_recovery}}\n## βœ… Recovered\n\nThe 5xx anomaly for upstream **{{upstream.name}}** has resolved. Current value: `{{value}}`.\n{{/is_recovery}}\n{{^is_recovery}}\n***\n\n## πŸ“ˆ Impact\n\n5xx errors indicate server-side failures that cause direct service disruptions for users. Dependent services that rely on successful responses from this NGINX upstream may experience cascading failures or degraded functionality.\n\n***\n\n## Runbook\n\n### Initial Troubleshooting Steps\n\n1. **Identify the affected upstream** from the alert (`{{upstream.name}}`).\n2. Open [**Metrics Explorer**](/metric/explorer) and inspect `nginx.upstream.peers.responses.5xx` broken down by `upstream`.\n3. Review NGINX error logs for connection failures or backend errors:\n ```bash\n tail -f /var/log/nginx/error.log\n ```\n4. Check upstream backend service health and application logs.\n5. Correlate the spike with recent deployments or infrastructure changes.\n\n### Cause and Resolution\n\n| Cause | Resolution |\n| ----- | ---------- |\n| Backend server is down or crashed (502) | Verify the upstream service is running; restart the service if needed and check its logs. |\n| Gateway timeout due to slow upstream (504) | Check upstream response times; increase `proxy_read_timeout` if the upstream is legitimately slow. |\n| Application-level errors (500) | Inspect upstream application logs for unhandled exceptions or crashes; roll back recent deployments if correlated. |\n| Service unavailable due to overload (503) | Check upstream server resource utilization; scale out or enable load balancing across more peers. |\n| Resource exhaustion on upstream servers | Review CPU, memory, and connection pool usage on the backend; tune resource limits and autoscaling. |\n\n### Related links\n\n* [Documentation](https://docs.datadoghq.com/integrations/nginx/)\n* [Logs](/logs?query=upstream:{{upstream.name}})\n* [Metrics Explorer (nginx.upstream.peers.responses.5xx)](/metric/explorer?exp_metric=nginx.upstream.peers.responses.5xx&exp_scope=upstream:{{upstream.name}}&exp_agg=avg&exp_type=line)\n\n### Who should be notified?\n\nAssign the appropriate notification handle for this alert (e.g., `@slack-infra`, `@pagerduty-nginx`):\n`@your-team-handle`\n{{/is_recovery}}", "name": "[NGINX] 5xx Errors higher than usual", "options": { "escalation_message": "", diff --git a/nginx/assets/monitors/upstream_peer_fails.json b/nginx/assets/monitors/upstream_peer_fails.json index 08cef0b5647dd..d8f4b0fb003d1 100644 --- a/nginx/assets/monitors/upstream_peer_fails.json +++ b/nginx/assets/monitors/upstream_peer_fails.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2020-09-16", - "last_updated_at": "2026-03-09", + "last_updated_at": "2026-04-09", "title": "Upstream peers are failing", "tags": [ "integration:nginx" ], "description": "NGINX can be configured to distribute incoming client requests to multiple upstream peers (individual web servers, application servers, or other backend services). This monitor tracks anomalies in the number of failed upstream peers to identify issues.", "definition": { - "message": "{{#is_alert}}\n## 🚨 What's happening\n\nAn anomaly has been detected in the number of upstream peer communication failures for **{{upstream.name}}** (anomaly score: `{{value}}`, threshold: `{{threshold}}`). NGINX is experiencing an unusual number of unsuccessful attempts to connect to or communicate with one or more backend servers.\n\nFirst triggered at **{{first_triggered_at}}**, active for **{{triggered_duration_sec}}** seconds.\n{{/is_alert}}{{#is_recovery}}\n## βœ… Recovered\n\nUpstream peer failures for **{{upstream.name}}** have resolved. Current value: `{{value}}`.\n{{/is_recovery}}\n{{^is_recovery}}\n***\n\n## πŸ“ˆ Impact\n\nUpstream peer failures reduce the pool of available backend servers, increasing load on healthy peers. Users may experience intermittent errors or increased response times as NGINX retries or routes traffic around failed peers.\n\n***\n\n## Runbook\n\n### Initial Troubleshooting Steps\n\n1. **Identify the affected upstream** from the alert (`{{upstream.name}}`).\n2. Open [**Metrics Explorer**](/metric/explorer) and inspect `nginx.stream.upstream.peers.fails` broken down by `upstream` to identify which specific peers are failing.\n3. Review NGINX error logs for connection-level failures:\n ```bash\n tail -f /var/log/nginx/error.log | grep \"upstream\"\n ```\n4. Test connectivity from the NGINX host to the failing upstream servers:\n ```bash\n curl -v http://:/health\n ```\n5. Correlate with recent configuration changes or upstream service deployments.\n\n### Cause and Resolution\n\n| Cause | Resolution |\n| ----- | ---------- |\n| Upstream server is down or crashed | Verify the upstream service is running and listening on the expected port; restart if needed. |\n| Network connectivity issues | Test connectivity from the NGINX host to the upstream; check firewall rules and network routing. |\n| Upstream not responding within timeout | Review `proxy_connect_timeout` and `proxy_read_timeout` in NGINX config; increase if the upstream is legitimately slow. |\n| Misconfigured upstream address or port | Verify the upstream block in NGINX configuration has the correct server addresses and ports. |\n| Firewall or security group blocking traffic | Check security group rules and host-based firewall (iptables/nftables) on the upstream servers. |\n\n### Related links\n\n* [Documentation](https://docs.datadoghq.com/integrations/nginx/)\n* [Metrics Explorer](/metric/explorer)\n* [Log Explorer](/logs?query=source%3Anginx)\n\n### Who should be notified?\n\nAssign the appropriate notification handle for this alert (e.g., `@slack-infra`, `@pagerduty-nginx`):\n`@your-team-handle`\n{{/is_recovery}}", + "message": "{{#is_alert}}\n## 🚨 What's happening\n\nAn anomaly has been detected in the number of upstream peer communication failures for **{{upstream.name}}** (anomaly score: `{{value}}`, threshold: `{{threshold}}`). NGINX is experiencing an unusual number of unsuccessful attempts to connect to or communicate with one or more backend servers.\n\nFirst triggered at **{{first_triggered_at}}**, active for **{{triggered_duration_sec}}** seconds.\n{{/is_alert}}{{#is_recovery}}\n## βœ… Recovered\n\nUpstream peer failures for **{{upstream.name}}** have resolved. Current value: `{{value}}`.\n{{/is_recovery}}\n{{^is_recovery}}\n***\n\n## πŸ“ˆ Impact\n\nUpstream peer failures reduce the pool of available backend servers, increasing load on healthy peers. Users may experience intermittent errors or increased response times as NGINX retries or routes traffic around failed peers.\n\n***\n\n## Runbook\n\n### Initial Troubleshooting Steps\n\n1. **Identify the affected upstream** from the alert (`{{upstream.name}}`).\n2. Open [**Metrics Explorer**](/metric/explorer) and inspect `nginx.stream.upstream.peers.fails` broken down by `upstream` to identify which specific peers are failing.\n3. Review NGINX error logs for connection-level failures:\n ```bash\n tail -f /var/log/nginx/error.log | grep \"upstream\"\n ```\n4. Test connectivity from the NGINX host to the failing upstream servers:\n ```bash\n curl -v http://:/health\n ```\n5. Correlate with recent configuration changes or upstream service deployments.\n\n### Cause and Resolution\n\n| Cause | Resolution |\n| ----- | ---------- |\n| Upstream server is down or crashed | Verify the upstream service is running and listening on the expected port; restart if needed. |\n| Network connectivity issues | Test connectivity from the NGINX host to the upstream; check firewall rules and network routing. |\n| Upstream not responding within timeout | Review `proxy_connect_timeout` and `proxy_read_timeout` in NGINX config; increase if the upstream is legitimately slow. |\n| Misconfigured upstream address or port | Verify the upstream block in NGINX configuration has the correct server addresses and ports. |\n| Firewall or security group blocking traffic | Check security group rules and host-based firewall (iptables/nftables) on the upstream servers. |\n\n### Related links\n\n* [Documentation](https://docs.datadoghq.com/integrations/nginx/)\n* [Logs](/logs?query=upstream:{{upstream.name}})\n* [Metrics Explorer (nginx.stream.upstream.peers.fails)](/metric/explorer?exp_metric=nginx.stream.upstream.peers.fails&exp_scope=upstream:{{upstream.name}}&exp_agg=avg&exp_type=line)\n\n### Who should be notified?\n\nAssign the appropriate notification handle for this alert (e.g., `@slack-infra`, `@pagerduty-nginx`):\n`@your-team-handle`\n{{/is_recovery}}", "name": "[NGINX] Upstream peers fails", "options": { "escalation_message": "", diff --git a/postgres/assets/monitors/percent_usage_connections.json b/postgres/assets/monitors/percent_usage_connections.json index bfa477bbe4e8a..01608f00c12bb 100644 --- a/postgres/assets/monitors/percent_usage_connections.json +++ b/postgres/assets/monitors/percent_usage_connections.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2021-03-17", - "last_updated_at": "2023-07-24", + "last_updated_at": "2026-04-09", "title": "Connection pool is reaching saturation point", "tags": [ "integration:postgres" ], "description": "In PostgreSQL, there is a limit of concurrent connections that can be increased. When this limit is exceeded, new users cannot establish a connection with the database. This monitor tracks the total number of connections.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nPostgreSQL connection usage on host {{host.name}} has exceeded 90% of the maximum allowed connections over the last 15 minutes.\n\n{{/is_alert}}", + "message": "{{#is_alert}}\n\n## What's happening?\nPostgreSQL connection usage on host {{host.name}} has exceeded 90% of the maximum allowed connections over the last 15 minutes.\n\n## Related Links\n\n- [Metrics Explorer (postgresql.percent_usage_connections)](/metric/explorer?exp_metric=postgresql.percent_usage_connections&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}", "name": "[Postgres] Number of connections is approaching connection limit on {{host.name}}", "options": { "escalation_message": "", diff --git a/postgres/assets/monitors/replication_delay.json b/postgres/assets/monitors/replication_delay.json index 889700af13e39..6ec45e4efe1c5 100644 --- a/postgres/assets/monitors/replication_delay.json +++ b/postgres/assets/monitors/replication_delay.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2021-02-16", - "last_updated_at": "2021-03-17", + "last_updated_at": "2026-04-09", "title": "Replication delay is high", "tags": [ "integration:postgres" ], "description": "Replication lag is the delay between the time when data is written to the primary database and the time when it is replicated to the standby databases. This monitor tracks the replication lag of the postgres database.", "definition": { - "message": "{{#is_alert}}\n\n## What's happening?\nAnomalies in replication delay on host {{host.name}} for PostgreSQL have been detected above the expected range within the past 15 minutes, over the last hour.\n\n{{/is_alert}}", + "message": "{{#is_alert}}\n\n## What's happening?\nAnomalies in replication delay on host {{host.name}} for PostgreSQL have been detected above the expected range within the past 15 minutes, over the last hour.\n\n## Related Links\n\n- [Metrics Explorer (postgresql.replication_delay)](/metric/explorer?exp_metric=postgresql.replication_delay&exp_agg=avg&exp_type=line)\n\n{{/is_alert}}", "name": "[Postgres] Replication delay is abnormally high on {{host.name}}", "options": { "escalation_message": "", diff --git a/redisdb/assets/monitors/high_mem.json b/redisdb/assets/monitors/high_mem.json index b230aa497a55d..a360246d126a6 100644 --- a/redisdb/assets/monitors/high_mem.json +++ b/redisdb/assets/monitors/high_mem.json @@ -1,14 +1,14 @@ { "version": 2, "created_at": "2021-02-08", - "last_updated_at": "2021-02-08", + "last_updated_at": "2026-04-09", "title": "Memory consumption is high", "tags": [ "integration:redis" ], "description": "Redis servers use RAM to store data and memory is a critical resource for its performance. This monitor tracks the percentage of used memory to avoid the risk of running out of memory, which can lead to performance issues.", "definition": { - "message": "## What's happening?\n{{#is_alert}}\nRedis memory usage has exceeded 90% of its allocated limit in the last 5 minutes with current value of {{value}}.\n{{/is_alert}} \n\n{{#is_warning}}\nRedis memory usage has exceeded 70% of its allocated limit in the last 5 minutes with current value of {{value}}.\n{{/is_warning}}", + "message": "## What's happening?\n{{#is_alert}}\nRedis memory usage has exceeded 90% of its allocated limit in the last 5 minutes with current value of {{value}}.\n{{/is_alert}} \n\n{{#is_warning}}\nRedis memory usage has exceeded 70% of its allocated limit in the last 5 minutes with current value of {{value}}.\n{{/is_warning}}\n\n## Related Links\n\n- [Metrics Explorer (redis.mem.used)](/metric/explorer?exp_metric=redis.mem.used&exp_agg=avg&exp_type=line)\n- [Metrics Explorer (redis.mem.maxmemory)](/metric/explorer?exp_metric=redis.mem.maxmemory&exp_agg=avg&exp_type=line)", "name": "[Redis] High memory consumption", "options": { "escalation_message": "",