Commit eeb2c60
committed
chore(beep boop 🤖): Bump (main, mcore-main) (2026-06-17)
1 parent 6e8c6bb commit eeb2c60
3 files changed
Lines changed: 59 additions & 138 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1 | | - | |
| 1 | + | |
Submodule Megatron-LM updated 50 files
- .github/copy-pr-bot.yaml+1-1
- .github/oncall_schedule.json+4-4
- AGENTS.md+1
- docs/user-guide/features/fine_grained_activation_offloading.md+2-2
- docs/user-guide/features/paged_stash.md+1-1
- megatron/core/distributed/fsdp/src/megatron_fsdp/experimental/__init__.py+20
- megatron/core/distributed/fsdp/src/megatron_fsdp/experimental/dbuffer.py+454
- megatron/core/distributed/fsdp/src/megatron_fsdp/experimental/layout.py+251
- megatron/core/distributed/fsdp/src/megatron_fsdp/experimental/placement.py+55
- megatron/core/extensions/transformer_engine.py+8
- megatron/core/inference/config.py+7-1
- megatron/core/inference/contexts/dynamic_context.py+88-11
- megatron/core/inference/contexts/mamba_slot_allocator.py+10-2
- megatron/core/inference/text_generation_controllers/text_generation_controller.py+12-12
- megatron/core/models/common/embeddings/rope_utils.py+29-24
- megatron/core/models/gpt/gpt_model.py+9-2
- megatron/core/models/hybrid/hybrid_model.py+13-1
- megatron/core/models/mimo/model/base.py+13
- megatron/core/models/vision/radio.py+3
- megatron/core/optimizer/distrib_optimizer.py+42
- megatron/core/optimizer/qk_clip.py+8
- megatron/core/pipeline_parallel/fine_grained_activation_offload.py+40-7
- megatron/core/transformer/experimental_attention_variant/absorbed_mla.py+155-185
- megatron/core/transformer/moe/README.md+1-1
- megatron/core/transformer/moe/experts.py+26-9
- megatron/core/transformer/multi_latent_attention.py+14-3
- megatron/core/transformer/transformer_config.py+31-5
- megatron/inference/utils.py+1-1
- megatron/rl/inference/megatron.py+4
- megatron/rl/rl_profiling.py+906
- megatron/training/arguments.py+11-1
- megatron/training/training.py+48-51
- pretrain_gpt.py+120-37
- pretrain_hybrid.py+114-38
- tests/unit_tests/distributed/megatron_fsdp/test_dbuffer.py+573
- tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_uneven_dtensor.py-21
- tests/unit_tests/fusions/test_mla_yarn_rope_apply.py+43-2
- tests/unit_tests/inference/contexts/test_dynamic_prefix_caching.py+33-2
- tests/unit_tests/inference/engines/test_dynamic_engine.py+12-12
- tests/unit_tests/inference/engines/test_hybrid_prefix_caching_e2e.py+7-2
- tests/unit_tests/inference/engines/test_prefix_caching_cuda_graphs.py+9-2
- tests/unit_tests/inference/test_mtp_cuda_graph_inference.py+289-3
- tests/unit_tests/inference/text_generation_controllers/test_text_generation_controller.py+7-5
- tests/unit_tests/models/mimo/test_mimo_zero_grad_buffer.py+33
- tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py+53
- tests/unit_tests/test_optimizer.py+64
- tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py+105-45
- tests/unit_tests/transformer/test_multi_latent_attention.py+3
- tools/trigger_internal_ci.md+2
- tools/trigger_internal_ci.py+52-4
0 commit comments