NVIDIA-NeMo
diff --git a/‎docs/design-docs/training-backends.md‎
Lines changed: 13 additions & 1 deletion b/‎docs/design-docs/training-backends.md‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎examples/configs/distillation_math.yaml‎
Lines changed: 2 additions & 0 deletions b/‎examples/configs/distillation_math.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/configs/distillation_math_megatron.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/distillation_math_megatron.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/dpo.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/dpo.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/grpo_math_1B.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/grpo_math_1B.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/grpo_math_1B_megatron.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/grpo_math_1B_megatron.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/recipes/llm/grpo-qwen2.5-1.5B-4n8g-megatron-yarn-256k.yaml‎
Lines changed: 32 additions & 0 deletions b/‎examples/configs/recipes/llm/grpo-qwen2.5-1.5B-4n8g-megatron-yarn-256k.yaml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎examples/configs/recipes/llm/sft-qwen3-0.6B-1n8g-megatron-yarn-64k.yaml‎
Lines changed: 58 additions & 0 deletions b/‎examples/configs/recipes/llm/sft-qwen3-0.6B-1n8g-megatron-yarn-64k.yaml‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎examples/configs/rm.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/rm.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/sft.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/sft.yaml‎
Lines changed: 1 addition & 0 deletions
@@ -75,4 +75,16 @@ export HF_HOME="/shared/nfs/huggingface"
 - **Mount in checkpoint directory**: If you are using Docker, make sure the Megatron checkpoint path is covered by `-v`/`--mount`. Similarly, if you are using SLURM+pyxis, ensure `--container-mounts` includes this path.
 - **Use shared storage**: Ensure the checkpoint directory is accessible from all nodes (e.g., NFS, shared filesystem).
 - **Prefer HF_HOME**: If you already have `HF_HOME` mounted across nodes, this reduces the number of environment variables to manage.
-- **Sufficient space**: Ensure adequate disk space for the converted model checkpoints.
+- **Sufficient space**: Ensure adequate disk space for the converted model checkpoints.
+
+### Force Reconvert ###
+
+By default, NeMo RL skips the HF → Megatron conversion if a converted checkpoint already exists at the target path. If you need to force a fresh conversion (e.g., after updating megatron-bridge or changing `hf_config_overrides`), set the following option in your config:
+
+```yaml
+policy:
+  megatron_cfg:
+    force_reconvert_from_hf: True  # Default: False
+```
+
+This is equivalent to deleting the converted checkpoint directory and rerunning — the old checkpoint will be overwritten with a freshly converted one.
@@ -87,6 +87,7 @@ policy: &POLICY_BASE
 
     megatron_cfg: &MEGATRON_BASE
         enabled: false
+        force_reconvert_from_hf: False # Set to True to force reconvert of the model from Hugging Face
         empty_unused_memory_level: 0
         activation_checkpointing: false
         converter_type: "Qwen3ForCausalLM"
@@ -196,6 +197,7 @@ policy: &POLICY_BASE
             num_last_layers_in_bf16: 0
             num_first_layers_in_bf16: 0
             distributed_executor_backend: null
+        vllm_kwargs: {}
 
         colocated:
             # true: generation shares training GPUs
 
@@ -145,6 +145,7 @@ policy: &POLICY_BASE
             num_last_layers_in_bf16: 0
             num_first_layers_in_bf16: 0
             distributed_executor_backend: null
+        vllm_kwargs: {}
 
         colocated:
             # true: generation shares training GPUs
 
@@ -121,6 +121,7 @@ policy:
     enabled: false
     use_linear_ce_fusion_loss: false
     linear_ce_fusion_chunk_size: 256
+    force_reconvert_from_hf: False # Set to True to force reconvert of the model from Hugging Face
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 2
 
@@ -128,6 +128,7 @@ policy:
 
   megatron_cfg:
     enabled: false
+    force_reconvert_from_hf: False # Set to True to force reconvert of the model from Hugging Face
     empty_unused_memory_level: 1  # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation. Setting to 0 is faster, but you are more likely to run out of GPU memory.
     activation_checkpointing: false
     converter_type: "Qwen2ForCausalLM"
 
@@ -78,6 +78,7 @@ policy:
 
   megatron_cfg:
     enabled: true
+    force_reconvert_from_hf: False # Set to True to force reconvert of the model from Hugging Face
     empty_unused_memory_level: 1  # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation. Setting to 0 is faster, but you are more likely to run out of GPU memory.
     activation_checkpointing: false
     converter_type: "Qwen2ForCausalLM"
 
@@ -0,0 +1,32 @@
+defaults: ../../grpo_math_1B_megatron.yaml
+grpo:
+  max_num_steps: 100
+  num_prompts_per_step: 2
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-1.5B-4n8g-megatron-yarn-256k
+  save_period: 20
+policy:
+  train_global_batch_size: 32
+  train_micro_batch_size: 1
+  max_total_sequence_length: 262144
+  make_sequence_length_divisible_by: 64
+  megatron_cfg:
+    context_parallel_size: 32
+  hf_config_overrides:
+    rope_scaling:
+      rope_type: yarn
+      rope_theta: 1000000
+      factor: ${div:${policy.max_total_sequence_length},${policy.hf_config_overrides.rope_scaling.original_max_position_embeddings}}
+      original_max_position_embeddings: 131072
+      truncate: true
+      beta_fast: 32
+      beta_slow: 1
+      mscale: 1
+      mscale_all_dim: 0
+logger:
+  wandb:
+    project: yarn
+    name: grpo-qwen2.5-1.5B-4n8g-megatron-yarn-256k
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
@@ -0,0 +1,58 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 100
+checkpointing:
+  checkpoint_dir: results/sft-qwen3-0.6B-1n8g-megatron-yarn-64k
+  save_period: 20
+policy:
+  model_name: Qwen/Qwen3-0.6B
+  train_global_batch_size: 16
+  max_total_sequence_length: 65536
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    context_parallel_size: 8
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: true
+    optimizer:
+      lr: 2.0e-05
+      min_lr: 2.0e-05
+      weight_decay: 0.01
+      adam_eps: 1.0e-08
+      clip_grad: 0
+      params_dtype: bfloat16
+      use_precision_aware_optimizer: false
+    scheduler:
+      lr_warmup_iters: 1
+      lr_warmup_init: 1.999999e-05
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: 16
+  optimizer: null
+  hf_config_overrides:
+    rope_scaling:
+      rope_type: yarn
+      rope_theta: 1000000
+      factor: 1.6
+      original_max_position_embeddings: 40960
+      truncate: true
+      beta_fast: 32
+      beta_slow: 1
+      mscale: 1
+      mscale_all_dim: 0
+data:
+  add_generation_prompt: true
+  train:
+    dataset_name: Nemotron-Cascade-2-SFT-Math
+    split_validation_size: 0.05
+    max_samples: 100000
+  validation: null
+logger:
+  wandb:
+    project: yarn
+    name: sft-qwen3-0.6B-1n8g-megatron-yarn-64k
+  tensorboard:
+    log_dir: tb_logs-sft-qwen3-0.6B-1n8g-megatron-yarn-64k
+cluster:
+  gpus_per_node: 8
@@ -77,6 +77,7 @@ policy:
   ## ignored since enabled=false, but needed for testing purposes
   megatron_cfg:
     enabled: false
+    force_reconvert_from_hf: False # Set to True to force reconvert of the model from Hugging Face
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 2
 
@@ -101,6 +101,7 @@ policy:
     enabled: false
     use_linear_ce_fusion_loss: false
     linear_ce_fusion_chunk_size: 256
+    force_reconvert_from_hf: False # Set to True to force reconvert of the model from Hugging Face
     env_vars: {}
     empty_unused_memory_level: 1
     activation_checkpointing: false