Skip to content

Commit 04fe905

Browse files
committed
Update Megatron dependencies for transformers v5 change.
1 parent fccec46 commit 04fe905

3 files changed

Lines changed: 732 additions & 272 deletions

File tree

pyproject.toml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ megatron = [
4747
"transformer-engine==2.11.0",
4848
"transformer-engine-cu12==2.11.0",
4949
"transformer-engine-torch==2.11.0",
50-
"megatron-core==0.15.2",
51-
"megatron-bridge==0.2.0rc6",
50+
"megatron-core==0.16.0rc0",
51+
"pybind11>=2.13.6",
52+
"megatron-bridge",
5253
"nvidia-ml-py==13.580.82",
5354
"ml-dtypes>=0.5.0 ; python_full_version < '3.13'",
5455
]
@@ -130,13 +131,18 @@ required-version = ">=0.6.15"
130131
override-dependencies = [
131132
"transformer-engine==2.11.0",
132133
"numpy<2",
134+
"nvidia-resiliency-ext<0.5",
135+
"flashinfer-python==0.6.1",
136+
"transformers==5.2.0",
133137
"torch==2.10.0",
134138
"quack-kernels==0.2.5",
135139
]
136-
no-build-isolation-package = ["transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
140+
exclude-dependencies = ["pynvml", "emerging-optimizers"]
141+
no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
137142

138143
[tool.uv.extra-build-dependencies]
139144
apex = ["torch>=2.8.0"]
145+
megatron-core = ["pybind11"]
140146
transformer-engine-torch = ["torch>=2.8.0"]
141147

142148
[tool.uv.extra-build-variables]
@@ -233,4 +239,5 @@ dev = [
233239
[tool.uv.sources]
234240
panza = { git = "https://github.com/corbt/panza.git" }
235241
apex = { git = "https://github.com/NVIDIA/apex.git", branch = "25.09" }
242+
megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "75f2c5ad4afb702b57b4781a00f5291a66bcf183" }
236243
transformer-engine-torch = { git = "https://github.com/NVIDIA/TransformerEngine.git", tag = "v2.11", subdirectory = "transformer_engine/pytorch" }

src/art/megatron/provider.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,5 @@ def _flex_attention_layer_spec(
111111
provider.calculate_per_token_loss = True
112112
if provider.tensor_model_parallel_size > 1:
113113
provider.sequence_parallel = True
114+
provider.finalize()
114115
return provider

0 commit comments

Comments
 (0)