diff --git a/.github/workflows/test-nvcr-pull.yml b/.github/workflows/test-nvcr-pull.yml new file mode 100644 index 00000000000..04ffc83d4fb --- /dev/null +++ b/.github/workflows/test-nvcr-pull.yml @@ -0,0 +1,76 @@ +# nvcr.io image pull + size — single-job smoke test. +# +# Pulls a specific nvcr.io image and reports its on-disk size. Does +# not include `docker login` — relies on whatever credentials the +# runner is configured with (the Packer-baked NGC pull-secret on +# nv-gha-runners, if present). A failure with an auth error is +# itself useful signal that the runner is not pre-configured for +# the target namespace. +# +# Triggers: same as `secret-scan.yml` — copy-pr-bot mirror branches +# (`pull-request/[0-9]+`) on NVIDIA/cccl + workflow_dispatch. + +name: nvcr image pull + size + +run-name: nvcr pull — ${{ github.ref_name }} + +on: + push: + branches: + - "pull-request/[0-9]+" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + pull-and-size: + name: docker pull + size + # NV self-hosted CPU runner on NVIDIA/cccl; GitHub-hosted fallback + # on contributor forks where nv-gha-runners labels do not resolve. + runs-on: ${{ github.repository == 'NVIDIA/cccl' && 'linux-amd64-cpu4' || 'ubuntu-latest' }} + # Cap stuck pulls / registry hangs so a wedged job doesn't sit on a + # self-hosted runner indefinitely. 15 min covers a multi-GB image + # pull on a busy runner with headroom; well under GitHub's default + # 6-hour job timeout. + timeout-minutes: 15 + env: + NVCR_IMAGE: nvcr.io/nvidian/prodsec/pulse-trufflehog:1.33 + + steps: + - name: docker pull + run: docker pull "${NVCR_IMAGE}" + + - name: Report size + run: | + set -euo pipefail + bytes=$(docker image inspect "${NVCR_IMAGE}" --format '{{.Size}}') + mib=$(awk -v b="${bytes}" 'BEGIN { printf "%.1f", b/1024/1024 }') + gib=$(awk -v b="${bytes}" 'BEGIN { printf "%.2f", b/1024/1024/1024 }') + echo "Image: ${NVCR_IMAGE}" + echo "Size: ${bytes} bytes (${mib} MiB / ${gib} GiB)" + + - name: Cleanup + if: always() + # Only attempt removal if the image is actually present locally. + # If the pull failed (e.g. auth error on a private namespace), + # `docker rmi` would fail with "no such image" — that's expected, + # not a real disk-growth signal, so we skip silently. If the + # image IS present and `rmi` fails, that's a genuine problem on + # a self-hosted runner (leaks layers across runs) — surface as + # a warning so it shows up in the run log. + run: | + set -euo pipefail + if ! docker image inspect "${NVCR_IMAGE}" >/dev/null 2>&1; then + echo "cleanup: image not present locally (likely pull failed); nothing to remove" + exit 0 + fi + if ! docker rmi "${NVCR_IMAGE}" >/dev/null 2>&1; then + echo "::warning::cleanup: failed to remove ${NVCR_IMAGE} from runner cache; manual cleanup may be needed to avoid disk growth on this runner" + else + echo "cleanup: removed ${NVCR_IMAGE}" + fi