Skip to content

Commit 0284fdd

Browse files
authored
run all recipes on a single node (#1108)
This PR does two things: * We want changed-files to show the files changed in this PR, not necessarily all differences between ToT main and the current branch. The merge queue will make sure that these run at least once where ToT main is the merge base * we spend almost all our time currently in CI pulling the base image, since we have to pull the pytorch base image independently on every CI runner. This change uses a single runner and just uses docker run for each test suite, so we can re-use that image pull <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Chores - CI now compares changes against a merge-base for more accurate diffs. - Consolidated unit tests into a single runner for simpler, faster execution. - Streamlined outputs and removed unnecessary per-directory and matrix steps. - Tests - Switched to a slimmer container image for test runs. - Added pip caching to speed dependency installs. - Ensured a clean environment by unsetting constraint variables before installs. - Standardized test invocation and non-interactive Docker runs. - Improved logging to show cache location and key test context. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Peter St. John <pstjohn@nvidia.com>
1 parent 4bf8d14 commit 0284fdd

3 files changed

Lines changed: 63 additions & 47 deletions

File tree

.github/workflows/unit-tests-framework.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,18 @@ jobs:
3030
fetch-depth: 0
3131
submodules: "recursive"
3232

33+
- name: Get merge-base commit
34+
id: merge-base
35+
run: |
36+
# Get the merge-base between current branch and main
37+
MERGE_BASE=$(git merge-base HEAD origin/main)
38+
echo "merge-base=$MERGE_BASE" >> $GITHUB_OUTPUT
39+
echo "Merge-base commit: $MERGE_BASE"
40+
3341
- uses: step-security/changed-files@v46
3442
id: changed-files
3543
with:
36-
base_sha: main
44+
base_sha: ${{ steps.merge-base.outputs.merge-base }}
3745
files: |
3846
**
3947
!models/**
@@ -42,6 +50,7 @@ jobs:
4250
!.github/**
4351
!.gitignore
4452
!.devcontainer/**
53+
!ci/scripts/recipes_local_test.py
4554
.github/workflows/unit-tests-framework.yml
4655
4756
- name: Show output

.github/workflows/unit-tests-recipes.yml

Lines changed: 30 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,21 @@ jobs:
3535
with:
3636
fetch-depth: 0
3737

38+
- name: Get merge-base commit
39+
id: merge-base
40+
run: |
41+
# Get the merge-base between current branch and main
42+
MERGE_BASE=$(git merge-base HEAD origin/main)
43+
echo "merge-base=$MERGE_BASE" >> $GITHUB_OUTPUT
44+
echo "Merge-base commit: $MERGE_BASE"
45+
3846
- name: Get changed files
3947
id: changed-files
4048
uses: step-security/changed-files@v46
4149
with:
4250
json: true
4351
matrix: true
44-
base_sha: main
52+
base_sha: ${{ steps.merge-base.outputs.merge-base }}
4553
dir_names: true
4654
dir_names_max_depth: 2
4755
files: |
@@ -81,24 +89,15 @@ jobs:
8189
fi
8290
fi
8391
84-
# Assign Docker images to the selected directories
85-
# Currently, AMPLIFY is the only folder that needs a custom base image, since we have to support both TE and
86-
# xformers-based models for golden value testing. The rest of the models use the default pytorch image.
87-
DIRS_WITH_IMAGES=$(echo "$DIRS" | jq -c '
88-
map({
89-
dir: .,
90-
image: (
91-
if . == "models/amplify" then
92-
"svcbionemo023/bionemo-framework:amplify-model-devcontainer-082025"
93-
else
94-
"nvcr.io/nvidia/pytorch:25.06-py3"
95-
end
96-
)
97-
})
98-
')
99-
echo "dirs=$DIRS_WITH_IMAGES" >> $GITHUB_OUTPUT
92+
echo "dirs=$DIRS" >> $GITHUB_OUTPUT
10093
- name: Show output
10194
run: |
95+
echo "=== Changed Files Analysis ==="
96+
echo "Current branch: ${{ github.ref_name }}"
97+
echo "Merge-base commit: ${{ steps.merge-base.outputs.merge-base }}"
98+
echo "Changed files compared to merge-base:"
99+
echo '${{ steps.changed-files.outputs.all_changed_files }}' | jq -r '.[]' | sed 's/^/ - /'
100+
echo "Total changed files: $(echo '${{ steps.changed-files.outputs.all_changed_files }}' | jq '. | length')"
102101
echo '${{ toJSON(steps.changed-files.outputs) }}'
103102
echo '${{ toJSON(steps.set-dirs.outputs) }}'
104103
shell: bash
@@ -107,40 +106,31 @@ jobs:
107106
needs: changed-dirs
108107
runs-on: linux-amd64-gpu-l4-latest-1
109108
if: ${{ needs.changed-dirs.outputs.dirs != '[]' }}
110-
container:
111-
image: ${{ matrix.recipe.image }}
112-
strategy:
113-
matrix:
114-
recipe: ${{ fromJson(needs.changed-dirs.outputs.dirs) }}
115-
fail-fast: false
116109

117110
steps:
118111
- name: Setup proxy cache
119112
uses: nv-gha-runners/setup-proxy-cache@main
113+
120114
- name: Checkout repository
121115
uses: actions/checkout@v4
116+
117+
- name: Setup python
118+
uses: actions/setup-python@v5
122119
with:
123-
sparse-checkout: "${{ matrix.recipe.dir }}"
124-
sparse-checkout-cone-mode: false
120+
python-version: "3.12"
125121

126-
- name: Install dependencies
127-
working-directory: ${{ matrix.recipe.dir }}
128-
#
122+
- name: Install ci script dependencies
129123
run: |
130-
if [ -f pyproject.toml ] || [ -f setup.py ]; then
131-
PIP_CONSTRAINT= pip install -e .
132-
echo "Installed ${{ matrix.recipe.dir }} as editable package"
133-
elif [ -f requirements.txt ]; then
134-
PIP_CONSTRAINT= pip install -r requirements.txt
135-
echo "Installed ${{ matrix.recipe.dir }} from requirements.txt"
136-
else
137-
echo "No pyproject.toml, setup.py, or requirements.txt found in ${{ matrix.recipe.dir }}"
138-
exit 1
139-
fi
124+
python -m pip install --upgrade pip
125+
pip install platformdirs
140126
141127
- name: Run tests
142-
working-directory: ${{ matrix.recipe.dir }}
143-
run: pytest -v .
128+
env:
129+
DIRS_JSON: ${{ needs.changed-dirs.outputs.dirs }}
130+
run: |
131+
# Convert JSON array to space-separated arguments
132+
DIRS_ARGS=$(echo "$DIRS_JSON" | jq -r '.[]' | tr '\n' ' ')
133+
./ci/scripts/recipes_local_test.py $DIRS_ARGS
144134
145135
verify-recipe-tests:
146136
# This job checks the status of the unit-tests matrix and fails if any matrix job failed or was cancelled.
Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,26 +23,39 @@
2323
from pathlib import Path
2424
from typing import List, Optional
2525

26+
from platformdirs import user_cache_dir
27+
28+
29+
PIP_CACHE_DIR = user_cache_dir(appname="bionemo-pip-cache", appauthor="nvidia")
2630

2731
logger = logging.getLogger(__name__)
2832
logger.setLevel(logging.DEBUG)
2933

3034
DOCKER_RUN_ARGS = [
3135
"--rm",
32-
"-it",
3336
"--gpus",
3437
"all",
3538
"--ipc=host",
3639
"--ulimit",
3740
"memlock=-1",
3841
"--ulimit",
3942
"stack=67108864",
43+
"-v",
44+
f"{PIP_CACHE_DIR}:/workspace/.cache/pip",
4045
]
4146

4247
CUSTOM_CONTAINERS = {
4348
"models/amplify": "svcbionemo023/bionemo-framework:amplify-model-devcontainer-082025",
4449
}
45-
DEFAULT_CONTAINER = "nvcr.io/nvidia/pytorch:25.06-py3"
50+
51+
# DEFAULT_CONTAINER = "nvcr.io/nvidia/pytorch:25.06-py3"
52+
53+
# This is a squashed version of the pytorch:25.06-py3 image, generated with
54+
# docker-squash nvcr.io/nvidia/pytorch:25.06-py3 -t svcbionemo023/bionemo-framework:pytorch25.06-py3-squashed
55+
# --output type=registry,compression=zstd,force-compression=true,oci-mediatypes=true,compression-level=15
56+
# and pushed to the dockerhub registry. Our github actions are able to cache image pulls from dockerhub but not nvcr, so
57+
# hopefully this cuts down slightly on CI time at the expense of having a slightly in-directed image location.
58+
DEFAULT_CONTAINER = "svcbionemo023/bionemo-framework:pytorch25.06-py3-squashed"
4659

4760

4861
def get_git_root() -> str:
@@ -89,24 +102,26 @@ def run_tests_in_docker(work_dir: str) -> bool:
89102
install_and_test_script = textwrap.dedent("""
90103
set -e # Exit on any error
91104
92-
echo "Checking for dependency files..."
105+
# Ensure image-embedded constraints do not leak into local recipe installs
106+
unset PIP_CONSTRAINT || true
93107
108+
echo "Checking for dependency files..."
94109
# Install dependencies based on available files
95110
if [ -f pyproject.toml ] || [ -f setup.py ]; then
96111
echo "Installing package in editable mode..."
97-
PIP_CONSTRAINT= pip install -e .
112+
PIP_CACHE_DIR=/workspace/.cache/pip pip install -e .
98113
echo "Installed package as editable package"
99114
elif [ -f requirements.txt ]; then
100115
echo "Installing from requirements.txt..."
101-
PIP_CONSTRAINT= pip install -r requirements.txt
116+
PIP_CACHE_DIR=/workspace/.cache/pip pip install -r requirements.txt
102117
echo "Installed from requirements.txt"
103118
else
104119
echo "No pyproject.toml, setup.py, or requirements.txt found"
105120
exit 1
106121
fi
107122
108123
echo "Running tests..."
109-
pytest -v .
124+
python -m pytest -v .
110125
""")
111126

112127
relative_path = Path(work_dir).relative_to(git_root).as_posix()
@@ -166,6 +181,8 @@ def main():
166181
if args.debug:
167182
logging.getLogger().setLevel(logging.DEBUG)
168183

184+
logger.info(f"Caching pip installations to: {PIP_CACHE_DIR}")
185+
169186
# Get directories to test
170187
test_dirs = get_test_directories(args.directories)
171188

0 commit comments

Comments
 (0)