Skip to content

Commit f2cd6a1

Browse files
committed
Bump megatron bridge dep
Signed-off-by: John St. John <jstjohn@nvidia.com>
1 parent 121c57e commit f2cd6a1

3 files changed

Lines changed: 12 additions & 6 deletions

File tree

bionemo-recipes/recipes/evo2_megatron/.ci_build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
rm -f /usr/local/lib/python*/dist-packages/transformer_engine-*.dist-info/direct_url.json
66
export UV_LOCK_TIMEOUT=900 # increase to 15 minutes (900 seconds), adjust as needed
77
export UV_LINK_MODE=copy
8-
uv venv --system-site-packages
8+
uv venv --clear --system-site-packages
99

1010
# 2. Activate the environment
1111
source .venv/bin/activate
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
poetry-core
2+
poetry_dynamic_versioning # build dep of nvidia-resiliency-ext (transitively pulled by megatron-bridge); needed in the venv because we install with --no-build-isolation
3+
grpcio-tools # build dep of nvidia-resiliency-ext: its setup.py shells out to `python -m grpc_tools.protoc` to compile *.proto files; --no-build-isolation means we have to provide it in the venv up-front
24
wheel_stub
35
ninja # should speed up causal-conv1d build

bionemo-recipes/recipes/evo2_megatron/pyproject.toml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies = [
2424
"causal_conv1d",
2525
"nv-grouped-gemm",
2626
"megatron-core",
27-
"nvidia-resiliency-ext",
27+
# nvidia-resiliency-ext is pulled transitively by megatron-bridge.
2828
"emerging_optimizers",
2929
"subquadratic-ops-torch-cu13",
3030

@@ -88,6 +88,9 @@ override-dependencies = [
8888
"triton; sys_platform == 'never'",
8989
"transformer-engine; sys_platform == 'never'",
9090
"transformer-engine[pytorch]; sys_platform == 'never'",
91+
# Avoid optional log-pattern-mining dependency conflicts from nvidia-resiliency-ext.
92+
"logsage; sys_platform == 'never'",
93+
"drain3; sys_platform == 'never'",
9194
]
9295

9396
[tool.uv.sources]
@@ -100,11 +103,12 @@ nv-grouped-gemm = { git = "https://github.com/fanshiqing/grouped_gemm", tag = "v
100103
# Internal dependencies
101104
bionemo-recipeutils = { git = "https://github.com/NVIDIA/bionemo-framework.git", branch = "main", subdirectory = "sub-packages/bionemo-recipeutils" }
102105
bionemo-core = { git = "https://github.com/NVIDIA/bionemo-framework.git", branch = "main", subdirectory = "sub-packages/bionemo-core" }
103-
nvidia-resiliency-ext = { git = "https://github.com/NVIDIA/nvidia-resiliency-ext.git", rev = "54f85fe422d296cf04ea524130014bd3a2c3add1" }
106+
# nvidia-resiliency-ext is intentionally left to Megatron-Bridge so the transitive pin stays consistent.
104107

105-
# Megatron Bundle. This points to a version that still supports the deprecated no_weight_decay_cond field until the API for an alternative has been finalized.
106-
megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "549e3cb970c170b1d7a86d021261efe05e8a5d9f" }
107-
megatron-core = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "549e3cb970c170b1d7a86d021261efe05e8a5d9f", subdirectory = "3rdparty/Megatron-LM" }
108+
# Megatron Bundle. MCore is sourced from the same Megatron-Bridge release tag.
109+
megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", tag = "v0.4.1" }
110+
megatron-core = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", tag = "v0.4.1", subdirectory = "3rdparty/Megatron-LM" }
108111

109112
[tool.uv.extra-build-dependencies]
110113
warp-lang = ["wheel_stub"]
114+
nvidia-resiliency-ext = ["poetry_dynamic_versioning"]

0 commit comments

Comments
 (0)