Skip to content

Merge remote-tracking branch 'origin/main' into include-ci-data #35

Merge remote-tracking branch 'origin/main' into include-ci-data

Merge remote-tracking branch 'origin/main' into include-ci-data #35

Workflow file for this run

# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This CI runs on pull requests and uses testmon to skip tests
# that don't have changed dependencies based on the nightly cache.
# The tests run here will only use UV.
#
# ----------------------------------------------------------------------------
# Cache design (see .github/CACHE_CONTRACT.md):
#
# Each job restores the nightly's uv download cache fail-open, then
# rebuilds .venv from the committed lockfile via `uv sync --frozen`.
# Testmon and coverage baseline caches are restored by lockhash with a
# prefix fallback (best-effort; testmon handles stale DBs gracefully).
#
# No cross-run .venv cache: a pinned container + frozen lockfile +
# pinned uv version already make the sync deterministic, and caching
# the realized venv was not worth the complexity it cost.
# ----------------------------------------------------------------------------
# TO DO: THE COVERAGE LIMIT IS VERY LOW, BECAUSE THIS IS NOT USING GPU TESTS OR
# THE DATA-DRIVEN TESTS. RAISE THIS UP AGAIN EVENTUALLY.
name: Pull Request Github CI
on:
workflow_dispatch:
push:
branches:
- "pull-request/[0-9]+"
permissions:
contents: read
actions: write
checks: write
defaults:
run:
shell: bash
env:
# ---- Container baseline identity ---------------------------------------
# MUST match the nightly workflow so cache keys align. See
# .github/CACHE_CONTRACT.md for the full design.
PYTHON_VERSION: "3.12"
UV_VERSION: "0.11.7"
CONTAINER_ID: "cuda12.8.1-cudnn-devel-ubuntu24.04"
EXTRAS_TAG: "cu12"
# ---- Cache key prefixes (shared with nightly) --------------------------
UV_CACHE_KEY_PREFIX: "uv-cache-nightly-cuda12.8.1-cudnn-devel-ubuntu24.04-py3.12-uv0.11.7"
TESTMON_CACHE_KEY_PREFIX: "testmon-nightly"
COVERAGE_CACHE_KEY_PREFIX: "coverage-nightly"
JIT_CACHE_KEY_PREFIX: "jit-cache-cuda12.8.1-cudnn-devel-ubuntu24.04-py3.12"
JIT_CACHE_DIR: "/root/.cache/jit"
# ---- uv read-only defaults --------------------------------------------
UV_FROZEN: "1"
UV_NO_SYNC: "1"
PYVISTA_OFF_SCREEN: "true"
jobs:
# Stage 1: Run testmon tests
#
# Each GPU job sets up its own environment: restore the nightly's uv
# download cache fail-open, then `uv sync --frozen --group dev --extra
# cu12`. The sync is fast because the warm uv cache already has the
# wheels. If the PR bumped uv.lock, any new wheels are downloaded on
# demand and the cache is simply not quite as warm.
testmon:
name: Testmon
runs-on: linux-amd64-gpu-h100-latest-1
container:
image: nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v5
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
uv-version: ${{ env.UV_VERSION }}
- name: Setup uv environment from cache
uses: ./.github/actions/setup-uv-env
with:
uv-cache-key-prefix: ${{ env.UV_CACHE_KEY_PREFIX }}
uv-cache-key-suffix: "latest"
extras: ${{ env.EXTRAS_TAG }}
# Restore the nightly testmon DB so testmon can skip unchanged tests.
# Exact-match uses the same lockhash the nightly saved under.
# For lock-changed PRs the exact key will miss; the prefix fallback
# gives the most recent nightly DB, which is still useful (testmon
# handles stale DBs gracefully by re-running tests whose dependency
# hashes differ).
- name: Restore testmon database from nightly cache
uses: actions/cache/restore@v5
with:
path: |
.testmondata
.testmondata-shm
.testmondata-wal
key: ${{ env.TESTMON_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
restore-keys: |
${{ env.TESTMON_CACHE_KEY_PREFIX }}-
- name: Restore JIT compilation cache from nightly
uses: actions/cache/restore@v5
with:
path: ${{ env.JIT_CACHE_DIR }}
key: ${{ env.JIT_CACHE_KEY_PREFIX }}-latest
- name: Download CI test data
uses: ./.github/actions/download-ci-data
with:
hf-token: ${{ secrets.HF_CI_DATA_TOKEN }}
- name: Run core tests (with testmon)
env:
WARP_CACHE_PATH: ${{ env.JIT_CACHE_DIR }}/warp
TRITON_CACHE_DIR: ${{ env.JIT_CACHE_DIR }}/triton
TORCHINDUCTOR_CACHE_DIR: ${{ env.JIT_CACHE_DIR }}/inductor
run: |
uv run --no-sync python -m pytest --testmon --ignore-glob="*docs*" --ignore-glob="*examples*"
# Stage 2: Run coverage tests and upload artifacts
coverage:
name: Coverage
runs-on: linux-amd64-gpu-h100-latest-1
container:
image: nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v5
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
uv-version: ${{ env.UV_VERSION }}
- name: Setup uv environment from cache
uses: ./.github/actions/setup-uv-env
with:
uv-cache-key-prefix: ${{ env.UV_CACHE_KEY_PREFIX }}
uv-cache-key-suffix: "latest"
extras: ${{ env.EXTRAS_TAG }}
- name: Restore testmon database from nightly cache
uses: actions/cache/restore@v5
with:
path: |
.testmondata
.testmondata-shm
.testmondata-wal
key: ${{ env.TESTMON_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
restore-keys: |
${{ env.TESTMON_CACHE_KEY_PREFIX }}-
- name: Restore nightly coverage baseline from cache
id: cache-coverage-restore
uses: actions/cache/restore@v5
with:
path: .coverage*
key: ${{ env.COVERAGE_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
restore-keys: |
${{ env.COVERAGE_CACHE_KEY_PREFIX }}-
- name: Restore JIT compilation cache from nightly
uses: actions/cache/restore@v5
with:
path: ${{ env.JIT_CACHE_DIR }}
key: ${{ env.JIT_CACHE_KEY_PREFIX }}-latest
- name: Download CI test data
uses: ./.github/actions/download-ci-data
with:
hf-token: ${{ secrets.HF_CI_DATA_TOKEN }}
- name: Run core tests for coverage report (testmon-selected)
env:
WARP_CACHE_PATH: ${{ env.JIT_CACHE_DIR }}/warp
TRITON_CACHE_DIR: ${{ env.JIT_CACHE_DIR }}/triton
TORCHINDUCTOR_CACHE_DIR: ${{ env.JIT_CACHE_DIR }}/inductor
run: |
uv run --no-sync coverage run --rcfile='test/coverage.pytest.rc' -m pytest --testmon --ignore-glob="*docs*" --ignore-glob="*examples*"
- name: Run doc tests (testmon not supported for doctests)
env:
WARP_CACHE_PATH: ${{ env.JIT_CACHE_DIR }}/warp
TRITON_CACHE_DIR: ${{ env.JIT_CACHE_DIR }}/triton
TORCHINDUCTOR_CACHE_DIR: ${{ env.JIT_CACHE_DIR }}/inductor
run: |
uv run --no-sync coverage run --rcfile='test/coverage.docstring.rc' -m pytest --doctest-modules physicsnemo/ --ignore-glob="*internal*" --ignore-glob="*experimental*"
- name: Merge coverage reports
run: |
echo "Coverage files to combine:"
ls -la .coverage* 2>/dev/null || echo "No coverage files found"
uv run --no-sync coverage combine
uv run --no-sync coverage report --show-missing --omit="*test*" --omit="*internal*" --omit="*experimental*" --fail-under=45
uv run --no-sync coverage html
uv run --no-sync coverage xml -o coverage.xml
- name: Upload coverage HTML report
uses: actions/upload-artifact@v4
with:
name: coverage-report-pr
path: htmlcov/
retention-days: 7