Skip to content

Commit 4c5ac7d

Browse files
authored
Merge branch 'main' into farhadr/evo2_cleanup
2 parents 58706fe + ee42211 commit 4c5ac7d

6 files changed

Lines changed: 83 additions & 97 deletions

File tree

.github/workflows/bionemo-subpackage-ci.yml

Lines changed: 54 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,115 +1,99 @@
11
name: BioNeMo Sub-Package Workflow
22

33
on:
4-
# TODO(@cspades): Re-enable PR-based testing and publishing for debugging or when we have sufficient compute resources.
5-
# pull_request:
6-
# types: [opened, synchronize, reopened, ready_for_review]
4+
# To test or publish sub-packages or adjustments to this workflow that are branched in PR's, manually dispatch this workflow on the PR's branch here: https://github.com/NVIDIA/bionemo-framework/actions/workflows/bionemo-subpackage-ci.yml.
75
workflow_dispatch:
86
inputs:
97
subpackages:
108
description: BioNeMo sub-packages (comma-separated) to test or publish.
119
required: true
1210
type: string
11+
test:
12+
description: Test the sub-packages before publishing to PyPI. Strongly recommended for production releases to PyPI. Can be disabled when staging sub-packages on Test PyPI or publishing circular dependencies to PyPI.
13+
required: false
14+
type: boolean
15+
default: true
1316
publish:
14-
description: Publish the built package to PyPI.
17+
description: Publish the built package to PyPI. If testing is specified, requires that all sub-package tests succeed based on dependencies published to Test PyPI or PyPI.
18+
required: false
19+
type: boolean
20+
default: false
21+
pypi:
22+
description: Publish to PyPI instead of Test PyPI.
1523
required: false
1624
type: boolean
1725
default: false
18-
19-
env:
20-
# TODO(@cspades): Skip pytest for packages that require GPU's or special installation recipes until resolved.
21-
SUBPACKAGE_SKIP_TEST: '["bionemo-moco","bionemo-noodles","bionemo-geometric","bionemo-webdatamodule","bionemo-size-aware-batching","bionemo-testing","bionemo-llm","bionemo-evo2","bionemo-esm2","bionemo-geneformer","bionemo-example_model","bionemo-fw"]'
22-
# TODO(@cspades): Skip publishing for packages that require special installation recipes or should not be published to PyPI.
23-
SUBPACKAGE_SKIP_PUBLISH: '["bionemo-noodles","bionemo-geometric","bionemo-testing","bionemo-example_model"]'
2426

2527
jobs:
2628
configure-workflow-packages:
2729
name: "[Configure Workflow Packages] Identify sub-packages for testing and publishing."
28-
# Sub-package CI override label in PR's.
29-
if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'SKIP_SUBPACKAGE_CI') }}
3030
runs-on: ubuntu-latest
31-
# Depending on the type of GitHub Event (Dispatch or PR), we stage different packages for testing and publishing in 'workflow_packages'.
3231
outputs:
33-
workflow_packages: ${{ steps.collect-staged-packages.outputs.workflow_packages }}
32+
workflow_packages: ${{ steps.parse-dispatch-packages.outputs.dispatch_packages }}
3433
steps:
35-
- uses: actions/checkout@v4
36-
with:
37-
fetch-depth: 0
38-
submodules: "recursive"
39-
- id: collect-package-deltas
40-
if: ${{ github.event_name == 'pull_request' }}
41-
name: Check which sub-packages have been updated in the PR.
42-
run: |
43-
# Get list of changed files between PR base and head.
44-
changed_files=$(git diff --name-only origin/${{ github.base_ref }})
45-
46-
# Find unique package directories that changed.
47-
changed_packages=$(echo "$changed_files" | grep "^sub-packages/" | cut -d'/' -f2 | sort -u | jq -R -s -c 'split("\n")[:-1]')
48-
49-
# Send the list of changed packages to the next job.
50-
echo "pr_packages=$changed_packages" >> "$GITHUB_OUTPUT"
51-
echo "[PR Mode] Sub-packages to stage: $changed_packages"
5234
- id: parse-dispatch-packages
5335
if: ${{ github.event_name == 'workflow_dispatch' }}
5436
name: Parse the sub-packages specified in the workflow dispatch.
5537
run: |
5638
# Send the parsed list of sub-packages to the next job.
5739
dispatch_packages=$(echo '${{ github.event.inputs.subpackages }}' | jq -R -c 'split(",")')
5840
echo "dispatch_packages=$dispatch_packages" >> "$GITHUB_OUTPUT"
59-
echo "[Workflow Dispatch Mode] Sub-packages to stage: $dispatch_packages"
60-
- id: collect-staged-packages
61-
name: Collect the sub-packages to stage.
62-
env:
63-
PR_PACKAGES: ${{ steps.collect-package-deltas.outputs.pr_packages }}
64-
DISPATCH_PACKAGES: ${{ steps.parse-dispatch-packages.outputs.dispatch_packages }}
65-
run: |
66-
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
67-
echo "workflow_packages=$PR_PACKAGES" >> "$GITHUB_OUTPUT"
68-
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
69-
echo "workflow_packages=$DISPATCH_PACKAGES" >> "$GITHUB_OUTPUT"
70-
else
71-
echo "workflow_packages=[]" >> "$GITHUB_OUTPUT"
72-
fi
41+
echo "[BioNeMo Sub-Package CI] Sub-packages to stage: $dispatch_packages"
7342
7443
install-and-test:
7544
needs: configure-workflow-packages
76-
outputs:
77-
test_packages: ${{ needs.configure-workflow-packages.outputs.workflow_packages }}
45+
# Check if the previous job has any staged packages to test and publish.
46+
if: ${{ needs.configure-workflow-packages.outputs.workflow_packages != '[]' }}
7847
strategy:
7948
matrix:
8049
package: ${{ fromJson(needs.configure-workflow-packages.outputs.workflow_packages) }}
8150
fail-fast: false # Prevent all matrix jobs from failing if one fails.
8251
name: "[${{ matrix.package }}] Install and test sub-package."
83-
runs-on: ubuntu-latest
52+
runs-on: linux-amd64-gpu-l4-latest-1
53+
container: # GPU jobs must run in a container. Use a fresh base container for package installation and testing.
54+
image: nvidia/cuda:12.8.1-base-ubuntu22.04
8455
steps:
56+
# Silently skip all steps if testing is disabled, which does not block building or publishing.
57+
- name: Install git and system dependencies.
58+
if: ${{ github.event.inputs.test == 'true' }}
59+
run: |
60+
apt-get update
61+
apt-get install -y git
62+
apt-get install -y lsb-release # No longer pre-installed in Ubuntu>=22.04.
63+
apt-get install -y build-essential # For installing C build tools, like GCC and make.
8564
- uses: actions/checkout@v4
65+
if: ${{ github.event.inputs.test == 'true' }}
8666
with:
8767
fetch-depth: 0
8868
submodules: "recursive"
8969
- uses: actions/setup-python@v5
70+
if: ${{ github.event.inputs.test == 'true' }}
9071
with:
9172
python-version: "3.12"
92-
cache: "pip"
9373
- id: install-dispatch-subpackage
74+
if: ${{ github.event.inputs.test == 'true' }}
9475
name: Install sub-package.
95-
if: ${{ !contains(fromJson(env.SUBPACKAGE_SKIP_TEST), matrix.package) }}
9676
run: |
77+
# Install sub-package and dependencies.
78+
pip install --upgrade pip setuptools
9779
pip install pytest
98-
pip install sub-packages/${{ matrix.package }}
80+
# Install required core & optional [test] dependencies.
81+
pip install --no-cache-dir 'sub-packages/${{ matrix.package }}[test]'
9982
- id: test-dispatch-subpackage
83+
if: ${{ github.event.inputs.test == 'true' }}
10084
name: Test sub-package.
101-
if: ${{ !contains(fromJson(env.SUBPACKAGE_SKIP_TEST), matrix.package) }}
10285
run: pytest -vv sub-packages/${{ matrix.package }}
10386

10487
build-pypi:
10588
# Build distributions from either the workflow dispatch or PR.
10689
# Validate building before merging or publishing.
107-
needs: install-and-test
90+
needs: [configure-workflow-packages, install-and-test]
91+
if: ${{ needs.configure-workflow-packages.outputs.workflow_packages != '[]' && github.event.inputs.publish == 'true' }}
10892
outputs:
109-
staged_packages: ${{ needs.install-and-test.outputs.test_packages }}
93+
staged_packages: ${{ needs.configure-workflow-packages.outputs.workflow_packages }}
11094
strategy:
11195
matrix:
112-
package: ${{ fromJson(needs.install-and-test.outputs.test_packages) }}
96+
package: ${{ fromJson(needs.configure-workflow-packages.outputs.workflow_packages) }}
11397
fail-fast: false # Prevent all matrix jobs from failing if one fails.
11498
name: "[${{ matrix.package }}] Build the sub-package."
11599
runs-on: ubuntu-latest
@@ -120,48 +104,48 @@ jobs:
120104
- uses: actions/setup-python@v5
121105
with:
122106
python-version: "3.12"
123-
cache: "pip"
124107
- id: build-package
125108
name: Build a binary wheel and a source tarball for the sub-package.
126-
if: ${{ !contains(fromJson(env.SUBPACKAGE_SKIP_PUBLISH), matrix.package) }}
127109
run: |
110+
if [[ "${{ github.event.inputs.test }}" != "true" ]]; then
111+
# For untested sub-packages, append '-dev' to the version for PyPI.
112+
sed -i 's/[[:space:]]*$//' sub-packages/${{ matrix.package }}/VERSION
113+
sed -i 's/$/-dev/' sub-packages/${{ matrix.package }}/VERSION
114+
fi
128115
python -m pip install build
129116
python -m build sub-packages/${{ matrix.package }}
130117
- id: upload-distribution
131118
name: Upload distribution packages to the workflow.
132-
if: ${{ !contains(fromJson(env.SUBPACKAGE_SKIP_PUBLISH), matrix.package) }}
133119
uses: actions/upload-artifact@v4
134120
with:
135121
name: ${{ matrix.package }}-dist
136122
path: sub-packages/${{ matrix.package }}/dist
137123

138124
publish-to-pypi:
139-
needs: build-pypi
125+
needs: [build-pypi, install-and-test]
126+
# Require staged sub-package builds for publishing to PyPI.
127+
if: ${{ needs.build-pypi.result == 'success' }}
140128
strategy:
141129
matrix:
142130
package: ${{ fromJson(needs.build-pypi.outputs.staged_packages) }}
143131
fail-fast: false # Prevent all matrix jobs from failing if one fails.
144132
name: Publish ${{ matrix.package }} to PyPI.
145133
runs-on: ubuntu-latest
146134
environment:
147-
name: ${{ github.event_name == 'pull_request' && 'testpypi' || 'pypi' }}
148-
url: ${{ github.event_name == 'pull_request' && format('https://test.pypi.org/p/{0}', matrix.package) || format('https://pypi.org/p/{0}', matrix.package) }}
135+
name: ${{ github.event.inputs.pypi && 'pypi' || 'testpypi' }}
136+
url: ${{ github.event.inputs.pypi && format('https://pypi.org/p/{0}', matrix.package) || format('https://test.pypi.org/p/{0}', matrix.package) }}
149137
permissions:
150138
id-token: write
151-
# Publish to Test PyPI for PRs, and publish to PyPI for (manually triggered) Workflow Dispatches.
152-
# Note: Test PyPI is designed to run on each commit to the main branch and is often used to indicate a healthy release publishing pipeline.
153139
steps:
154140
- id: download-distribution
155141
name: Download the built distribution.
156-
if: ${{ !contains(fromJson(env.SUBPACKAGE_SKIP_PUBLISH), matrix.package) }}
157142
uses: actions/download-artifact@v4
158143
with:
159144
name: ${{ matrix.package }}-dist
160145
path: sub-packages/${{ matrix.package }}/dist
161-
# TODO(@cspades): Uncomment when developing on this workflow to test. Otherwise, no need to publish for every PR.
162146
- id: publish-to-testpypi
163147
name: Publish distribution 📦 to Test PyPI for PR.
164-
if: ${{ github.event_name == 'pull_request' && !contains(fromJson(env.SUBPACKAGE_SKIP_PUBLISH), matrix.package) }}
148+
if: ${{ github.event.inputs.pypi == 'false' }}
165149
uses: pypa/gh-action-pypi-publish@release/v1
166150
with:
167151
verbose: true
@@ -170,7 +154,10 @@ jobs:
170154
skip-existing: true
171155
- id: publish-to-pypi
172156
name: Publish distribution 📦 to PyPI for Workflow Dispatch.
173-
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish == 'true' && !contains(fromJson(env.SUBPACKAGE_SKIP_PUBLISH), matrix.package) }}
157+
# To require testing before publishing to PyPI, add: ... && needs.install-and-test.result == 'success'
158+
# If testing is run but fails, the workflow will fail and not publish to PyPI (or Test PyPI).
159+
# We strongly recommend testing when publishing to production PyPI.
160+
if: ${{ github.event.inputs.pypi == 'true' }}
174161
uses: pypa/gh-action-pypi-publish@release/v1
175162
with:
176163
verbose: true

Dockerfile

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,6 @@ apt-get upgrade -qyy \
3535
rm -rf /tmp/* /var/tmp/*
3636
EOF
3737

38-
## BUMP TE from v1.14 to v1.13
39-
ARG TE_TAG=v1.13
40-
RUN NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi \
41-
pip --disable-pip-version-check --no-cache-dir install \
42-
git+https://github.com/NVIDIA/TransformerEngine.git@${TE_TAG}
43-
4438
# Use a branch of causal_conv1d while the repository works on Blackwell support.
4539
RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip --disable-pip-version-check --no-cache-dir install git+https://github.com/trvachov/causal-conv1d.git@52e06e3d5ca10af0c7eb94a520d768c48ef36f1f
4640

docs/docs/user-guide/contributing/contributing.md

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,6 @@ Key behaviors:
120120
- Skips entire CI pipeline
121121
- Use for documentation typos, README updates
122122

123-
#### **SKIP_SUBPACKAGE_CI**
124-
125-
- Skips installation, testing, and publication of individual sub-packages of BioNeMo.
126-
- For more granular controls on a per-package basis, such as skipping only testing or only publication to PyPI, you can modify hard-coded sub-package names (`bionemo-...`) listed within `SKIP...` variables in [`bionemo-framework/.github/workflows/bionemo-subpackage-ci.yml`](../../../../.github/workflows/bionemo-subpackage-ci.yml).
127-
- Sub-package CI is enabled (not skipped) by default. Utilized to test individual BioNeMo sub-packages without the environmental support of the BioNeMo Framework Container, which validates if the sub-package can be pusblished standalone to PyPI.
128-
129123
#### **INCLUDE_NOTEBOOKS_TESTS**
130124

131125
- Enables notebook validation tests
@@ -259,7 +253,7 @@ The resulting altered baseline files should then be committed.
259253
- Unit tests not associated with source code in BioNeMo can be placed anywhere reasonable under `tests/bionemo/<package-name-suffix>`.
260254
- Verify that the `pyproject.toml` is `pip install`-able (and `python -m build`-able).
261255
- If the sub-package is publishable, follow the instructions in [Publishing to PyPI](#publishing-to-pypi) to register or link your package to the sub-package workflow in BioNeMo Framework.
262-
- If the sub-package cannot be installed, built, or published to PyPI, add the name of this sub-package to `SUBPACKAGE_SKIP_TEST` and `SUBPACKAGE_SKIP_PUBLISH` in [`bionemo-framework/.github/workflows/bionemo-subpackage-ci.yml`](../../../../.github/workflows/bionemo-subpackage-ci.yml).
256+
- Add test dependencies to a `test` field under `[project.optional-dependencies]` for test-only dependencies.
263257

264258
### Publishing to PyPI
265259

@@ -276,27 +270,33 @@ To publish your sub-package via "Trusted Publishing" to PyPI, you can follow the
276270
- Environment Name:
277271
- `pypi` for PyPI
278272
- `testpypi` for Test PyPI
279-
- **NVIDIA-Only**: Run the workflow!
280-
- Create or update any PR with `git diff` changes to your `bionemo-framework/sub-packages`.
281-
- Publishes to Test PyPI.
282-
- Dispatch the `bionemo-subpackage-ci.yml` workflow from GitHub Actions.
283-
- Publishes to PyPI.
284-
- Required: Input a comma-separated list of sub-packages you want to test and/or publish into `subpackages`.
285-
- Optional: Set `publish` to `true` if you want to publish to PyPI. (Default: `false`)
273+
- **NVIDIA-Only**: Run the workflow! For more information, refer to: [Sub-Package GitHub Actions Workflow](#sub-package-github-actions-workflow)
286274
- **Optional**: Add `bionemo` as an owner or maintainer of the PyPI package if you want help maintaining it.
287275
- **Disclaimer**: If this is not done, and the package becomes dysfunctional, then NVIDIA / BioNeMo are not responsible for the health of the package or the sub-package source code, because we will not have the ability to deprecate versions, etc.
288276

289-
### Sub-Package CI Workflow Key Points
277+
### Sub-Package GitHub Actions Workflow
278+
279+
- Dispatch the `bionemo-subpackage-ci.yml` workflow from GitHub Actions to test, build, and publish your sub-packages to PyPI!
280+
- Required: Input a comma-separated list of sub-packages you want to test and/or publish into `subpackages`.
281+
- For example, `bionemo-moco,bionemo-llm,bionemo-webdatamodule`. The sub-packages will be tested and published in separate parallel environments.
282+
- Optional: Set `test` to `true` if you want to test your sub-package. (Default: `true`)
283+
- Sub-packages that require pre- or post- installation steps may require modification of the `install-and-test` job in [`bionemo-framework/.github/workflows/bionemo-subpackage-ci.yml`](../../../../.github/workflows/bionemo-subpackage-ci.yml).
284+
- Optional: Set `publish` to `true` if you want to publish to Test PyPI or PyPI. (Default: `false`)
285+
- Pre-Requisite: [BioNeMo Publishing to PyPI](#publishing-to-pypi)
286+
- Publishing requires package building, but does not require testing for flexibility of package management.
287+
- Optional: Publishes to Test PyPI by default. To publish to PyPI, check `Publish to PyPI instead of TestPyPI`.
288+
289+
### FAQ
290290

291-
- Individually `pip install`, `pytest`, and `python -m build` every supported sub-package before publishing to PyPI.
292-
- Triggered by PR changes (which publish to Test PyPI) or manually-dispatched GitHub Actions Workflows.
293-
- TODO(@cspades): PR workflows are deactivated pending sufficient compute resources. To reactivate them in your PR, uncomment the PR trigger in [`bionemo-framework/.github/workflows/bionemo-subpackage-ci.yml`](../../../../.github/workflows/bionemo-subpackage-ci.yml).
294-
- `SUBPACKAGE_SKIP_TEST` and `SUBPACKAGE_SKIP_PUBLISH` in [`bionemo-framework/.github/workflows/bionemo-subpackage-ci.yml`](../../../../.github/workflows/bionemo-subpackage-ci.yml) control whether your sub-package is installed/tested or published to PyPI.
295-
- BioNeMo Pull Request Archive
296-
- Prototype: https://github.com/NVIDIA/bionemo-framework/pull/725
291+
- What do I do if I want to test and publish two updated sub-packages that depend on each other?
292+
- To deal with circular dependencies, publish one package to PyPI first, followed by testing and publishing the other. `pip` installs dependencies in reverse topological order, and will resolve / break circular dependencies as long as dependency conflicts do not exist. (If dependency conflicts exist, resolve them!)
293+
- For example, if `A` depends on `B`, and `B` depends on `A`...
294+
- Publish `B` to PyPI without testing. Untested sub-packages will be published with the version suffix `*-dev`.
295+
- Set `A` to depend on the latest version (i.e. the `*-dev` version) of `B`.
296+
- Test and publish `A` to PyPI.
297+
- Test and publish `B` (which depends on the now-released `A`) to PyPI.
297298

298299
### TODO
299300

300301
- Support building packages that have installation dependencies, such as `bionemo-noodles` dependent on `maturin` or `bionemo-<model>` dependent on `transformer-engine`.
301-
- Support unit tests that require GPU.
302302
- Automatically cut a release tag for the sub-package via GHA.

sub-packages/bionemo-amplify/src/bionemo/amplify/train_amplify.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def main(
307307
save_top_k=save_top_k,
308308
every_n_train_steps=val_check_interval,
309309
always_save_context=True, # Enables the .nemo file-like checkpointing where all IOMixins are under SerDe
310-
filename="{epoch}-{val_loss:.2f}-{step}-{consumed_samples}", # Including step and consumed_samples in the checkpoint filename prevents duplicate filenames and bugs related to this.
310+
filename="{epoch}-{step}-{consumed_samples}", # Including step and consumed_samples in the checkpoint filename prevents duplicate filenames and bugs related to this.
311311
)
312312

313313
# Setup the logger and train the model

sub-packages/bionemo-esm2/src/bionemo/esm2/scripts/train_esm2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def main(
333333
every_n_train_steps=val_check_interval,
334334
always_save_context=True,
335335
# Enables the .nemo file-like checkpointing where all IOMixins are under SerDe
336-
filename="{epoch}-{val_loss:.2f}-{step}-{consumed_samples}",
336+
filename="{epoch}-{step}-{consumed_samples}",
337337
# Including step and consumed_samples in the checkpoint filename prevents duplicate filenames and bugs related to this.
338338
)
339339
callbacks.append(checkpoint_callback)

sub-packages/bionemo-llm/pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ dependencies = [
2121
# TODO: Figure out how (or whether) to include apex & TE as part of the dependencies.
2222
]
2323

24+
[project.optional-dependencies]
25+
test = [
26+
"bionemo-testing"
27+
]
28+
2429
[tool.setuptools.packages.find]
2530
where = ["src"]
2631
include = ["bionemo.*"]

0 commit comments

Comments
 (0)