Skip to content

Commit bc0f235

Browse files
committed
Expose fully_parallel_save and validate_access_integrity in save_megatron_model
Based on container image commit dd9729f (v0.5.0.nemotron_3_super). Add fully_parallel_save and validate_access_integrity parameters to AutoBridge.save_megatron_model() and model_load_save.save_megatron_model(). Needed by NVIDIA-NeMo/RL#2226.
1 parent dd9729f commit bc0f235

2 files changed

Lines changed: 8 additions & 0 deletions

File tree

src/megatron/bridge/models/conversion/auto_bridge.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,8 @@ def save_megatron_model(
590590
hf_tokenizer_path: Optional[str | Path] = None,
591591
low_memory_save: bool = False,
592592
hf_tokenizer_kwargs: Optional[dict] = None,
593+
fully_parallel_save: bool = True,
594+
validate_access_integrity: bool = True,
593595
) -> None:
594596
"""
595597
Save a Megatron model in native Megatron checkpoint format without optimizer
@@ -648,6 +650,8 @@ def save_megatron_model(
648650
hf_tokenizer_path=hf_tokenizer_path,
649651
low_memory_save=low_memory_save,
650652
hf_tokenizer_kwargs=hf_tokenizer_kwargs,
653+
fully_parallel_save=fully_parallel_save,
654+
validate_access_integrity=validate_access_integrity,
651655
)
652656

653657
def load_megatron_model(

src/megatron/bridge/training/model_load_save.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,8 @@ def save_megatron_model(
413413
hf_tokenizer_path: Optional[Union[str, Path]] = None,
414414
low_memory_save: bool = False,
415415
hf_tokenizer_kwargs: Optional[dict] = None,
416+
fully_parallel_save: bool = True,
417+
validate_access_integrity: bool = True,
416418
) -> None:
417419
"""Save a Megatron model in native Megatron checkpoint format without optimizer state.
418420
@@ -505,6 +507,8 @@ def save_megatron_model(
505507
save_rng=False,
506508
ckpt_format=ckpt_format,
507509
dist_ckpt_optim_fully_reshardable=True,
510+
fully_parallel_save=fully_parallel_save,
511+
ckpt_assume_constant_structure=not validate_access_integrity,
508512
),
509513
dist=None,
510514
)

0 commit comments

Comments
 (0)