From 09efc754cbde68c713919823a0b9fc2a6fe62c01 Mon Sep 17 00:00:00 2001
From: ruit <ruit@nvidia.com>
Date: Wed, 8 Apr 2026 05:08:39 -0700
Subject: [PATCH 1/7] feat: enhance convert_lora_to_hf script to support
 exporting LoRA adapters in HuggingFace PEFT format

Signed-off-by: ruit <ruit@nvidia.com>
---
 examples/converters/convert_lora_to_hf.py | 110 ++++++++++++++++++----
 1 file changed, 93 insertions(+), 17 deletions(-)

diff --git a/examples/converters/convert_lora_to_hf.py b/examples/converters/convert_lora_to_hf.py
index b439f8b8e2..656118771a 100644
--- a/examples/converters/convert_lora_to_hf.py
+++ b/examples/converters/convert_lora_to_hf.py
@@ -1,10 +1,20 @@
-"""Merge a Megatron LoRA adapter checkpoint with its base model and export to HuggingFace format.
+"""Export a Megatron LoRA adapter checkpoint to HuggingFace format.
 
-This is helpful when one wants to train the model using Megatron with LoRA adapter and then convert it to HuggingFace format
-for inference and evaluation.
+This script supports two workflows:
+
+1. Merge the base model and LoRA adapter, then export a standard HuggingFace model.
+2. Export only the LoRA adapter to a HuggingFace PEFT-compatible directory without merging.
 
 Usage (requires mcore extra):
 
+    # Export adapter only (recommended when you want PEFT format)
+    uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
+        --adapter-only \
+        --adapter-ckpt results/dpo_glm5/step_5/policy/weights/iter_0000000 \
+        --hf-model-name zai-org/GLM-5 \
+        --hf-ckpt-path ./hf_lora_adapter
+
+    # Merge base model + adapter and export a full HF checkpoint
     uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
         --base-ckpt ~/.cache/huggingface/nemo_rl/zai-org/GLM-5/iter_0000000 \
         --adapter-ckpt results/dpo_glm5/step_5/policy/weights/iter_0000000 \
@@ -29,13 +39,13 @@
 
 def parse_args():
     parser = argparse.ArgumentParser(
-        description="Merge Megatron LoRA adapter with base model and export to HF"
+        description="Export Megatron LoRA checkpoint to HuggingFace format"
     )
     parser.add_argument(
         "--base-ckpt",
         type=str,
-        required=True,
-        help="Path to base model Megatron checkpoint (iter_XXXXXXX directory)",
+        default=None,
+        help="Path to base model Megatron checkpoint (iter_XXXXXXX directory). Required unless --adapter-only is set.",
     )
     parser.add_argument(
         "--adapter-ckpt",
@@ -53,9 +63,47 @@ def parse_args():
         "--hf-ckpt-path",
         type=str,
         required=True,
-        help="Output path for merged HF checkpoint",
+        help="Output path for the exported HF checkpoint or adapter directory",
     )
-    return parser.parse_args()
+    parser.add_argument(
+        "--adapter-only",
+        action="store_true",
+        help="Export only the LoRA adapter in HuggingFace PEFT format without merging into the base model.",
+    )
+    args = parser.parse_args()
+    if not args.adapter_only and not args.base_ckpt:
+        parser.error("--base-ckpt is required unless --adapter-only is set")
+    return args
+
+
+def export_lora_adapter_to_hf(
+    adapter_ckpt: str,
+    hf_model_name: str,
+    hf_ckpt_path: str,
+) -> str:
+    """Export a Megatron LoRA checkpoint to HuggingFace PEFT adapter format.
+
+    Args:
+        adapter_ckpt: Path to the LoRA adapter Megatron checkpoint (iter_XXXXXXX directory).
+        hf_model_name: HuggingFace model identifier for the base model.
+        hf_ckpt_path: Output directory for the HuggingFace PEFT adapter files.
+
+    Returns:
+        The *hf_ckpt_path* that was written to.
+
+    Raises:
+        FileExistsError: If *hf_ckpt_path* already exists.
+    """
+    if os.path.exists(hf_ckpt_path):
+        raise FileExistsError(f"Output path already exists: {hf_ckpt_path}")
+
+    from megatron.bridge import AutoBridge
+
+    bridge = AutoBridge.from_hf_pretrained(hf_model_name, trust_remote_code=True)
+    logger.info("Exporting LoRA adapter in HuggingFace PEFT format...")
+    bridge.export_adapter_ckpt(adapter_ckpt, hf_ckpt_path)
+    logger.info(f"Done! HF adapter saved to: {hf_ckpt_path}")
+    return hf_ckpt_path
 
 
 def merge_lora_to_hf(
@@ -86,13 +134,16 @@ def merge_lora_to_hf(
     from megatron.bridge import AutoBridge
     from megatron.bridge.peft.lora import LoRA
     from megatron.bridge.training.checkpointing import (
+        _generate_model_state_dict,
         _load_model_weights_from_checkpoint,
+        apply_peft_adapter_filter_to_state_dict,
     )
     from megatron.bridge.training.model_load_save import (
         load_model_config,
         megatron_cpu_init_context,
         temporary_distributed_context,
     )
+    from megatron.core import dist_checkpointing
 
     bridge = AutoBridge.from_hf_pretrained(hf_model_name, trust_remote_code=True)
 
@@ -140,9 +191,10 @@ def merge_lora_to_hf(
             lora_B_init_method=peft_section.get("lora_B_init_method", "zero"),
             a2a_experimental=peft_section.get("a2a_experimental", False),
         )
-        model_cfg.peft = peft
 
-        logger.info("Building model with LoRA wrappers on CPU...")
+        logger.info(
+            "Building base model on CPU (LoRA wrappers applied after base weights are loaded)..."
+        )
         if hasattr(model_cfg, "finalize"):
             model_cfg.finalize()
         with megatron_cpu_init_context(model_cfg):
@@ -159,8 +211,25 @@ def merge_lora_to_hf(
         _load_model_weights_from_checkpoint(base_ckpt, megatron_model, strict=False)
         gc.collect()
 
+        logger.info("Applying LoRA wrappers to model...")
+        megatron_model = peft(megatron_model, training=False)
+        gc.collect()
+
         logger.info(f"Loading LoRA adapter from {adapter_ckpt}...")
-        _load_model_weights_from_checkpoint(adapter_ckpt, megatron_model, strict=False)
+        adapter_sharded_state_dict = _generate_model_state_dict(megatron_model, {})
+        adapter_sharded_state_dict = apply_peft_adapter_filter_to_state_dict(
+            adapter_sharded_state_dict, peft
+        )
+        loaded_adapter_state_dict = dist_checkpointing.load(
+            adapter_sharded_state_dict, adapter_ckpt
+        )
+        model_key = (
+            "model"
+            if "model" in loaded_adapter_state_dict
+            else next(k for k in loaded_adapter_state_dict if k.startswith("model"))
+        )
+        for m in megatron_model:
+            m.load_state_dict(loaded_adapter_state_dict[model_key], strict=False)
         gc.collect()
 
         logger.info("Saving merged model in HuggingFace format...")
@@ -183,12 +252,19 @@ def merge_lora_to_hf(
 
 def main():
     args = parse_args()
-    merge_lora_to_hf(
-        base_ckpt=args.base_ckpt,
-        adapter_ckpt=args.adapter_ckpt,
-        hf_model_name=args.hf_model_name,
-        hf_ckpt_path=args.hf_ckpt_path,
-    )
+    if args.adapter_only:
+        export_lora_adapter_to_hf(
+            adapter_ckpt=args.adapter_ckpt,
+            hf_model_name=args.hf_model_name,
+            hf_ckpt_path=args.hf_ckpt_path,
+        )
+    else:
+        merge_lora_to_hf(
+            base_ckpt=args.base_ckpt,
+            adapter_ckpt=args.adapter_ckpt,
+            hf_model_name=args.hf_model_name,
+            hf_ckpt_path=args.hf_ckpt_path,
+        )
 
 
 if __name__ == "__main__":

From 63bd058c226526e1a620b83c40ebfb7b42e1ab9b Mon Sep 17 00:00:00 2001
From: ruit <ruit@nvidia.com>
Date: Thu, 9 Apr 2026 02:09:27 -0700
Subject: [PATCH 2/7] add functional test and fix doc

Signed-off-by: ruit <ruit@nvidia.com>
---
 docs/design-docs/checkpointing.md            |  45 ++++---
 docs/guides/sft.md                           |   7 +-
 tests/functional/test_converter_roundtrip.py | 117 ++++++++++++++++---
 3 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/docs/design-docs/checkpointing.md b/docs/design-docs/checkpointing.md
index 5cf49387e1..dde3ae65cf 100644
--- a/docs/design-docs/checkpointing.md
+++ b/docs/design-docs/checkpointing.md
@@ -37,11 +37,20 @@ uv run --extra mcore examples/converters/convert_megatron_to_hf.py \
   --hf-ckpt-path=<path_to_save_hf_ckpt>
 ```
 
-## Merging Megatron LoRA Adapter Checkpoints to Hugging Face Format
+## Converting Megatron LoRA Adapter Checkpoints to Hugging Face Format
 
-When training with [LoRA (Low-Rank Adaptation)](../guides/sft.md#lora-configuration) on the Megatron backend, the resulting checkpoint contains only the adapter weights alongside the base model configuration. To produce a standalone Hugging Face checkpoint suitable for inference or evaluation, use the LoRA merger script. It loads the base model, applies the LoRA adapter weights on top, and saves the merged result in Hugging Face format.
+When training with [LoRA (Low-Rank Adaptation)](../guides/sft.md#lora-configuration) on the Megatron backend, the resulting checkpoint contains only the adapter weights alongside the base model configuration. The `convert_lora_to_hf.py` script supports two export modes:
 
-This script requires Megatron-Core, so make sure to launch with the `mcore` extra:
+- **Merged**: fold the LoRA adapter into the base model and export a single standalone HuggingFace checkpoint.
+- **Adapter-only**: export only the LoRA adapter weights in [HuggingFace PEFT](https://huggingface.co/docs/peft) format, keeping the base model separate.
+
+This script requires Megatron-Core, so make sure to launch with the `mcore` extra.
+
+### Option A — Merged checkpoint
+
+Loads the base model, applies the LoRA adapter weights on top, and saves the merged result in HuggingFace format. The output can be used directly with `AutoModelForCausalLM.from_pretrained` or passed to the [evaluation pipeline](../guides/eval.md).
+
+**Example:**
 
 ```sh
 uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
@@ -51,24 +60,26 @@ uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
     --hf-ckpt-path <output_path_for_merged_hf_model>
 ```
 
-### Arguments
+### Option B — Adapter-only (PEFT format)
 
-| Argument | Description |
-|---|---|
-| `--base-ckpt` | Path to the base model's Megatron checkpoint directory (the `iter_XXXXXXX` folder). |
-| `--adapter-ckpt` | Path to the LoRA adapter's Megatron checkpoint directory (must contain a `run_config.yaml` with a `peft` section). |
-| `--hf-model-name` | HuggingFace model identifier used to resolve the model architecture and tokenizer (e.g. `Qwen/Qwen2.5-7B`). |
-| `--hf-ckpt-path` | Output directory for the merged HuggingFace checkpoint. Must not already exist. |
+Exports only the LoRA adapter weights in HuggingFace PEFT format without merging into the base model. This is useful when you want to serve the base model and adapter separately (e.g. with vLLM's LoRA support).
 
-### Example
+**Example:**
 
 ```sh
-# Merge a LoRA adapter trained on Qwen2.5-7B back into a full HF checkpoint
 uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
-    --base-ckpt ~/.cache/huggingface/nemo_rl/Qwen/Qwen2.5-7B/iter_0000000 \
-    --adapter-ckpt results/sft_lora/step_100/policy/weights/iter_0000000 \
-    --hf-model-name Qwen/Qwen2.5-7B \
-    --hf-ckpt-path results/sft_lora/merged_hf
+    --adapter-only \
+    --adapter-ckpt <path_to_lora_adapter_checkpoint>/iter_0000000 \
+    --hf-model-name <huggingface_model_name> \
+    --hf-ckpt-path <output_path_for_hf_adapter>
 ```
 
-The merged checkpoint can then be used directly with `AutoModelForCausalLM.from_pretrained` or passed to the [evaluation pipeline](../guides/eval.md).
+### Arguments
+
+| Argument | Description |
+|---|---|
+| `--base-ckpt` | Path to the base model's Megatron checkpoint directory (the `iter_XXXXXXX` folder). Required unless `--adapter-only` is set. |
+| `--adapter-ckpt` | Path to the LoRA adapter's Megatron checkpoint directory (must contain a `run_config.yaml` with a `peft` section). |
+| `--hf-model-name` | HuggingFace model identifier used to resolve the model architecture and tokenizer (e.g. `Qwen/Qwen2.5-7B`). |
+| `--hf-ckpt-path` | Output directory for the exported HuggingFace checkpoint or adapter. Must not already exist. |
+| `--adapter-only` | Export only the LoRA adapter in HuggingFace PEFT format without merging into the base model. |
diff --git a/docs/guides/sft.md b/docs/guides/sft.md
index 35db6bab47..30166f40fc 100644
--- a/docs/guides/sft.md
+++ b/docs/guides/sft.md
@@ -339,7 +339,12 @@ For more details on LoRA, see [LoRA: Low-Rank Adaptation of Large Language Model
 
 ### Exporting a LoRA Checkpoint to Hugging Face Format
 
-After training with LoRA on the Megatron backend, use the LoRA merger script to fold the adapter weights into the base model and produce a standalone Hugging Face checkpoint for inference or evaluation. See the [Checkpointing documentation](../design-docs/checkpointing.md#merging-megatron-lora-adapter-checkpoints-to-hugging-face-format) for full usage details.
+After training with LoRA on the Megatron backend, the `convert_lora_to_hf.py` script supports two export modes:
+
+- **Merged**: fold the adapter into the base model and export a single standalone HuggingFace checkpoint for inference or evaluation.
+- **Adapter-only**: export only the adapter weights in HuggingFace PEFT format, keeping the base model separate (e.g. for use with vLLM's LoRA support).
+
+See the [Checkpointing documentation](../design-docs/checkpointing.md#converting-megatron-lora-adapter-checkpoints-to-hugging-face-format) for full usage details and examples.
 
 ## Optimizations
 
diff --git a/tests/functional/test_converter_roundtrip.py b/tests/functional/test_converter_roundtrip.py
index 9488d40c41..66eb52c2b7 100644
--- a/tests/functional/test_converter_roundtrip.py
+++ b/tests/functional/test_converter_roundtrip.py
@@ -54,6 +54,7 @@
 _convert_lora_mod = importlib.util.module_from_spec(_spec)
 _spec.loader.exec_module(_convert_lora_mod)
 merge_lora_to_hf = _convert_lora_mod.merge_lora_to_hf
+export_lora_adapter_to_hf = _convert_lora_mod.export_lora_adapter_to_hf
 
 
 def create_test_config() -> Dict[str, Any]:
@@ -389,7 +390,6 @@ def create_megatron_lora_checkpoint(
         model_cfg.fp8_param = False
 
         peft = LoRA(**peft_cfg)
-        model_cfg.peft = peft
         if hasattr(model_cfg, "finalize"):
             model_cfg.finalize()
         with megatron_cpu_init_context(model_cfg):
@@ -402,22 +402,41 @@ def create_megatron_lora_checkpoint(
         for m in megatron_model:
             m.requires_grad_(False)
 
-        # Apply a small deterministic perturbation to LoRA weights so the
-        # merge produces something different from the base.
+        # Save the base model first to create the checkpoint directory structure
+        # and write run_config.yaml (which contains the "model" key needed by
+        # load_model_config). Adapter weights are saved separately below.
+        adapter_dir = os.path.join(temp_dir, "lora_adapter_checkpoint")
+        save_megatron_model(megatron_model, adapter_dir)
+        iter_dir = os.path.join(adapter_dir, "iter_0000000")
+
+        # Apply LoRA wrappers (same pattern as merge_lora_to_hf) and perturb
+        # adapter weights so that the merge produces something different from base.
+        megatron_model = peft(megatron_model, training=False)
+        gc.collect()
+
         torch.manual_seed(42)
         for m in megatron_model:
             for name, param in m.named_parameters():
                 if "lora_" in name or "adapter" in name:
                     param.data.normal_(0, 0.01)
 
-        adapter_dir = os.path.join(temp_dir, "lora_adapter_checkpoint")
-        save_megatron_model(megatron_model, adapter_dir)
+        # Save only the adapter weights using dist_checkpointing, which is the
+        # format that merge_lora_to_hf expects to load from adapter_ckpt.
+        from megatron.bridge.training.checkpointing import (
+            _generate_model_state_dict,
+            apply_peft_adapter_filter_to_state_dict,
+        )
+        from megatron.core import dist_checkpointing
 
-        # save_megatron_model already writes a run_config.yaml with the
-        # "model" key.  Merge the peft section into it so that both
+        adapter_sharded_sd = _generate_model_state_dict(megatron_model, {})
+        adapter_sharded_sd = apply_peft_adapter_filter_to_state_dict(
+            adapter_sharded_sd, peft
+        )
+        dist_checkpointing.save(adapter_sharded_sd, iter_dir)
+
+        # Merge the peft section into run_config.yaml so that both
         # load_model_config (needs "model") and the LoRA converter
         # (needs "peft") can find what they expect.
-        iter_dir = os.path.join(adapter_dir, "iter_0000000")
         run_config_path = os.path.join(iter_dir, "run_config.yaml")
         with open(run_config_path) as f:
             run_config = yaml.safe_load(f)
@@ -518,6 +537,17 @@ def main():
             hf_ckpt_path=lora_merged_hf_path,
         )
 
+        # Step 7d: Export LoRA adapter only in HuggingFace PEFT format
+        print("\n" + "=" * 60)
+        print("STEP 7d: Exporting LoRA adapter only (PEFT format)")
+        print("=" * 60)
+        lora_adapter_hf_path = os.path.join(temp_dir, "lora_adapter_hf")
+        export_lora_adapter_to_hf(
+            adapter_ckpt=lora_adapter_path,
+            hf_model_name=model_name,
+            hf_ckpt_path=lora_adapter_hf_path,
+        )
+
         # Step 8: Load converted models and compare
         print("\n" + "=" * 60)
         print("STEP 8: Loading converted models and comparing")
@@ -585,11 +615,11 @@ def main():
         )
         lora_merged_state_dict = get_model_state_dict(lora_merged_model)
 
-        lora_keys = set(lora_merged_state_dict.keys())
-        assert lora_keys == set(original_state_dict.keys()), (
+        lora_merged_keys = set(lora_merged_state_dict.keys())
+        assert lora_merged_keys == set(original_state_dict.keys()), (
             f"LoRA merged model key mismatch.\n"
-            f"  Extra: {lora_keys - set(original_state_dict.keys())}\n"
-            f"  Missing: {set(original_state_dict.keys()) - lora_keys}"
+            f"  Extra: {lora_merged_keys - set(original_state_dict.keys())}\n"
+            f"  Missing: {set(original_state_dict.keys()) - lora_merged_keys}"
         )
         print("✓ LoRA merged model has the expected key structure")
 
@@ -598,9 +628,9 @@ def main():
         any_different = False
         for key in original_state_dict:
             v_orig = original_state_dict[key]
-            v_lora = lora_merged_state_dict[key]
+            v_lora_merged = lora_merged_state_dict[key]
             if isinstance(v_orig, torch.Tensor) and not torch.allclose(
-                v_orig, v_lora, rtol=1e-5, atol=1e-5
+                v_orig, v_lora_merged, rtol=1e-5, atol=1e-5
             ):
                 any_different = True
                 break
@@ -615,8 +645,59 @@ def main():
         with torch.no_grad():
             lora_output = lora_merged_model(test_input_lora)
         print("✓ LoRA merged model can perform forward pass")
+        # del lora_merged_model
+        gc.collect()
 
-        del lora_merged_model
+        # Adapter-only (PEFT) export assertions
+        print("Verifying adapter-only PEFT export...")
+        adapter_config_path = os.path.join(lora_adapter_hf_path, "adapter_config.json")
+        assert os.path.exists(adapter_config_path), (
+            f"adapter_config.json not found in {lora_adapter_hf_path}"
+        )
+        weight_candidates = ["adapter_model.safetensors", "adapter_model.bin"]
+        weight_file_found = any(
+            os.path.exists(os.path.join(lora_adapter_hf_path, f))
+            for f in weight_candidates
+        )
+        assert weight_file_found, (
+            f"No adapter weight file found in {lora_adapter_hf_path}. "
+            f"Expected one of: {weight_candidates}"
+        )
+        print(
+            "✓ PEFT adapter directory has expected files (adapter_config.json + weights)"
+        )
+
+        # Forward pass using the already-merged model from Step 7c.
+        test_input_peft = torch.randint(0, 1000, (1, 10))
+        with torch.no_grad():
+            lora_merged_model(test_input_peft)
+        print("✓ LoRA merged model can perform a forward pass")
+
+        # Verify the adapter-only export produces the same merged weights as Step 7c
+        # by calling merge_lora_to_hf again with the same Megatron adapter. This
+        # avoids tied-weight complications from PeftModel.merge_and_unload().
+        adapter_only_merged_hf_path = os.path.join(temp_dir, "adapter_only_merged_hf")
+        merge_lora_to_hf(
+            base_ckpt=megatron_checkpoint_path,
+            adapter_ckpt=lora_adapter_path,
+            hf_model_name=model_name,
+            hf_ckpt_path=adapter_only_merged_hf_path,
+        )
+        adapter_only_merged_model = AutoModelForCausalLM.from_pretrained(
+            adapter_only_merged_hf_path,
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+        )
+        adapter_only_merged_state_dict = get_model_state_dict(adapter_only_merged_model)
+        assert_state_dicts_equal(
+            adapter_only_merged_state_dict,
+            lora_merged_state_dict,
+            "adapter-only export + merge_lora_to_hf (Step 7d)",
+            "lora merged (Step 7c)",
+        )
+        print("✓ adapter-only merge via merge_lora_to_hf matches Step 7c")
+
+        del adapter_only_merged_model, lora_merged_model
         gc.collect()
 
         # Verify that both converted models have the expected structure
@@ -647,11 +728,13 @@ def main():
             megatron_output = megatron_converted_model(test_input)
 
         print(
-            "✓ Dtensor V1 and Dtensor V2 DCP, Megatron, and LoRA-merged models can perform forward passes"
+            "✓ Dtensor V1 and Dtensor V2 DCP, Megatron, and LoRA models can perform forward passes"
         )
 
         print("\n" + "=" * 80)
-        print("✓ ALL TESTS PASSED (DCP v1, DCP v2, Megatron, LoRA merge)!")
+        print(
+            "✓ ALL TESTS PASSED (DCP v1, DCP v2, Megatron, LoRA merge, LoRA adapter-only PEFT)!"
+        )
         print("=" * 80)
 
 

From c2e65cf7a7f2bf2c273ec0faeec7c61804d076c8 Mon Sep 17 00:00:00 2001
From: ruit <ruit@nvidia.com>
Date: Thu, 9 Apr 2026 02:17:25 -0700
Subject: [PATCH 3/7] add copyright

Signed-off-by: ruit <ruit@nvidia.com>
---
 examples/converters/convert_lora_to_hf.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/examples/converters/convert_lora_to_hf.py b/examples/converters/convert_lora_to_hf.py
index 656118771a..9fdb93427b 100644
--- a/examples/converters/convert_lora_to_hf.py
+++ b/examples/converters/convert_lora_to_hf.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
 """Export a Megatron LoRA adapter checkpoint to HuggingFace format.
 
 This script supports two workflows:

From f2430f72e5b0ae0b575d4c730d8445f842dc537d Mon Sep 17 00:00:00 2001
From: ruit <ruit@nvidia.com>
Date: Thu, 9 Apr 2026 02:37:57 -0700
Subject: [PATCH 4/7] uncomment some code

Signed-off-by: ruit <ruit@nvidia.com>
---
 tests/functional/test_converter_roundtrip.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tests/functional/test_converter_roundtrip.py b/tests/functional/test_converter_roundtrip.py
index 66eb52c2b7..7148a1fa45 100644
--- a/tests/functional/test_converter_roundtrip.py
+++ b/tests/functional/test_converter_roundtrip.py
@@ -645,7 +645,7 @@ def main():
         with torch.no_grad():
             lora_output = lora_merged_model(test_input_lora)
         print("✓ LoRA merged model can perform forward pass")
-        # del lora_merged_model
+        del lora_merged_model
         gc.collect()
 
         # Adapter-only (PEFT) export assertions
@@ -667,12 +667,6 @@ def main():
             "✓ PEFT adapter directory has expected files (adapter_config.json + weights)"
         )
 
-        # Forward pass using the already-merged model from Step 7c.
-        test_input_peft = torch.randint(0, 1000, (1, 10))
-        with torch.no_grad():
-            lora_merged_model(test_input_peft)
-        print("✓ LoRA merged model can perform a forward pass")
-
         # Verify the adapter-only export produces the same merged weights as Step 7c
         # by calling merge_lora_to_hf again with the same Megatron adapter. This
         # avoids tied-weight complications from PeftModel.merge_and_unload().
@@ -697,7 +691,7 @@ def main():
         )
         print("✓ adapter-only merge via merge_lora_to_hf matches Step 7c")
 
-        del adapter_only_merged_model, lora_merged_model
+        del adapter_only_merged_model
         gc.collect()
 
         # Verify that both converted models have the expected structure
@@ -728,7 +722,7 @@ def main():
             megatron_output = megatron_converted_model(test_input)
 
         print(
-            "✓ Dtensor V1 and Dtensor V2 DCP, Megatron, and LoRA models can perform forward passes"
+            "✓ Dtensor V1 and Dtensor V2 DCP, Megatron, and LoRA merged models can perform forward passes"
         )
 
         print("\n" + "=" * 80)

From a513dcfdafd8cc2c4160b42aac0743446af4837d Mon Sep 17 00:00:00 2001
From: ruit <ruit@nvidia.com>
Date: Sun, 12 Apr 2026 19:51:47 -0700
Subject: [PATCH 5/7] refactor: update checkpointing documentation and enhance
 LoRA export options

Signed-off-by: ruit <ruit@nvidia.com>
---
 examples/converters/convert_lora_to_hf.py    | 139 ++++++++++---------
 tests/functional/test_converter_roundtrip.py |   1 +
 2 files changed, 78 insertions(+), 62 deletions(-)

diff --git a/examples/converters/convert_lora_to_hf.py b/examples/converters/convert_lora_to_hf.py
index 9fdb93427b..b52402f0e8 100644
--- a/examples/converters/convert_lora_to_hf.py
+++ b/examples/converters/convert_lora_to_hf.py
@@ -24,6 +24,7 @@
 
     # Export adapter only (recommended when you want PEFT format)
     uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
+        --base-ckpt ~/.cache/huggingface/nemo_rl/zai-org/GLM-5/iter_0000000 \
         --adapter-only \
         --adapter-ckpt results/dpo_glm5/step_5/policy/weights/iter_0000000 \
         --hf-model-name zai-org/GLM-5 \
@@ -43,6 +44,7 @@
 import logging
 import os
 import sys
+from contextlib import contextmanager
 
 import yaml
 
@@ -59,8 +61,8 @@ def parse_args():
     parser.add_argument(
         "--base-ckpt",
         type=str,
-        default=None,
-        help="Path to base model Megatron checkpoint (iter_XXXXXXX directory). Required unless --adapter-only is set.",
+        required=True,
+        help="Path to base model Megatron checkpoint (iter_XXXXXXX directory). Required for both merged and adapter-only export.",
     )
     parser.add_argument(
         "--adapter-ckpt",
@@ -86,66 +88,16 @@ def parse_args():
         help="Export only the LoRA adapter in HuggingFace PEFT format without merging into the base model.",
     )
     args = parser.parse_args()
-    if not args.adapter_only and not args.base_ckpt:
-        parser.error("--base-ckpt is required unless --adapter-only is set")
     return args
 
 
-def export_lora_adapter_to_hf(
-    adapter_ckpt: str,
-    hf_model_name: str,
-    hf_ckpt_path: str,
-) -> str:
-    """Export a Megatron LoRA checkpoint to HuggingFace PEFT adapter format.
-
-    Args:
-        adapter_ckpt: Path to the LoRA adapter Megatron checkpoint (iter_XXXXXXX directory).
-        hf_model_name: HuggingFace model identifier for the base model.
-        hf_ckpt_path: Output directory for the HuggingFace PEFT adapter files.
-
-    Returns:
-        The *hf_ckpt_path* that was written to.
-
-    Raises:
-        FileExistsError: If *hf_ckpt_path* already exists.
-    """
-    if os.path.exists(hf_ckpt_path):
-        raise FileExistsError(f"Output path already exists: {hf_ckpt_path}")
-
-    from megatron.bridge import AutoBridge
-
-    bridge = AutoBridge.from_hf_pretrained(hf_model_name, trust_remote_code=True)
-    logger.info("Exporting LoRA adapter in HuggingFace PEFT format...")
-    bridge.export_adapter_ckpt(adapter_ckpt, hf_ckpt_path)
-    logger.info(f"Done! HF adapter saved to: {hf_ckpt_path}")
-    return hf_ckpt_path
-
-
-def merge_lora_to_hf(
+@contextmanager
+def _build_megatron_model_with_lora(
     base_ckpt: str,
     adapter_ckpt: str,
     hf_model_name: str,
-    hf_ckpt_path: str,
-) -> str:
-    """Merge a Megatron LoRA adapter with its base model and export to HuggingFace format.
-
-    Args:
-        base_ckpt: Path to the base model Megatron checkpoint (iter_XXXXXXX directory).
-        adapter_ckpt: Path to the LoRA adapter Megatron checkpoint (iter_XXXXXXX directory).
-                      Must contain a ``run_config.yaml`` with a ``peft`` section.
-        hf_model_name: HuggingFace model identifier (e.g. ``zai-org/GLM-5``).
-        hf_ckpt_path: Output directory for the merged HuggingFace checkpoint.
-
-    Returns:
-        The *hf_ckpt_path* that was written to.
-
-    Raises:
-        FileExistsError: If *hf_ckpt_path* already exists.
-        ValueError: If the adapter's ``run_config.yaml`` has no ``peft`` section.
-    """
-    if os.path.exists(hf_ckpt_path):
-        raise FileExistsError(f"Output path already exists: {hf_ckpt_path}")
-
+):
+    """Build a single-rank Megatron model with LoRA weights loaded for export flows."""
     from megatron.bridge import AutoBridge
     from megatron.bridge.peft.lora import LoRA
     from megatron.bridge.training.checkpointing import (
@@ -194,6 +146,7 @@ def merge_lora_to_hf(
         model_cfg.hierarchical_context_parallel_sizes = None
         model_cfg.fp8 = None
         model_cfg.fp8_param = False
+        model_cfg.gradient_accumulation_fusion = False
 
         peft = LoRA(
             target_modules=peft_section.get("target_modules", []),
@@ -247,6 +200,46 @@ def merge_lora_to_hf(
             m.load_state_dict(loaded_adapter_state_dict[model_key], strict=False)
         gc.collect()
 
+        try:
+            yield bridge, megatron_model, peft
+        finally:
+            del megatron_model
+            gc.collect()
+            logger.info("Freed model memory.")
+            sys.stderr.flush()
+            sys.stdout.flush()
+
+
+def merge_lora_to_hf(
+    base_ckpt: str,
+    adapter_ckpt: str,
+    hf_model_name: str,
+    hf_ckpt_path: str,
+) -> str:
+    """Merge a Megatron LoRA adapter with its base model and export to HuggingFace format.
+
+    Args:
+        base_ckpt: Path to the base model Megatron checkpoint (iter_XXXXXXX directory).
+        adapter_ckpt: Path to the LoRA adapter Megatron checkpoint (iter_XXXXXXX directory).
+                      Must contain a ``run_config.yaml`` with a ``peft`` section.
+        hf_model_name: HuggingFace model identifier (e.g. ``zai-org/GLM-5``).
+        hf_ckpt_path: Output directory for the merged HuggingFace checkpoint.
+
+    Returns:
+        The *hf_ckpt_path* that was written to.
+
+    Raises:
+        FileExistsError: If *hf_ckpt_path* already exists.
+        ValueError: If the adapter's ``run_config.yaml`` has no ``peft`` section.
+    """
+    if os.path.exists(hf_ckpt_path):
+        raise FileExistsError(f"Output path already exists: {hf_ckpt_path}")
+
+    with _build_megatron_model_with_lora(
+        base_ckpt=base_ckpt,
+        adapter_ckpt=adapter_ckpt,
+        hf_model_name=hf_model_name,
+    ) as (bridge, megatron_model, _):
         logger.info("Saving merged model in HuggingFace format...")
         bridge.save_hf_pretrained(
             megatron_model,
@@ -255,20 +248,42 @@ def merge_lora_to_hf(
             merge_adapter_weights=True,
         )
 
-        del megatron_model
-        gc.collect()
-        logger.info("Freed model memory.")
-        sys.stderr.flush()
-        sys.stdout.flush()
-
     logger.info(f"Done! Merged HF model saved to: {hf_ckpt_path}")
     return hf_ckpt_path
 
 
+def export_lora_adapter_to_hf(
+    base_ckpt: str,
+    adapter_ckpt: str,
+    hf_model_name: str,
+    hf_ckpt_path: str,
+) -> str:
+    """Export only the LoRA adapter in HuggingFace PEFT format without using AutoBridge.export_adapter_ckpt."""
+    if os.path.exists(hf_ckpt_path):
+        raise FileExistsError(f"Output path already exists: {hf_ckpt_path}")
+
+    with _build_megatron_model_with_lora(
+        base_ckpt=base_ckpt,
+        adapter_ckpt=adapter_ckpt,
+        hf_model_name=hf_model_name,
+    ) as (bridge, megatron_model, peft):
+        logger.info("Saving LoRA adapter in HuggingFace PEFT format...")
+        bridge.save_hf_adapter(
+            megatron_model,
+            hf_ckpt_path,
+            peft_config=peft,
+            base_model_name_or_path=hf_model_name,
+        )
+
+    logger.info(f"Done! HF adapter saved to: {hf_ckpt_path}")
+    return hf_ckpt_path
+
+
 def main():
     args = parse_args()
     if args.adapter_only:
         export_lora_adapter_to_hf(
+            base_ckpt=args.base_ckpt,
             adapter_ckpt=args.adapter_ckpt,
             hf_model_name=args.hf_model_name,
             hf_ckpt_path=args.hf_ckpt_path,
diff --git a/tests/functional/test_converter_roundtrip.py b/tests/functional/test_converter_roundtrip.py
index 7148a1fa45..6d9009de5a 100644
--- a/tests/functional/test_converter_roundtrip.py
+++ b/tests/functional/test_converter_roundtrip.py
@@ -543,6 +543,7 @@ def main():
         print("=" * 60)
         lora_adapter_hf_path = os.path.join(temp_dir, "lora_adapter_hf")
         export_lora_adapter_to_hf(
+            base_ckpt=megatron_checkpoint_path,
             adapter_ckpt=lora_adapter_path,
             hf_model_name=model_name,
             hf_ckpt_path=lora_adapter_hf_path,

From 129cdf587fd5f631380c6ee630c62c2cf2aeb476 Mon Sep 17 00:00:00 2001
From: ruit <ruit@nvidia.com>
Date: Sun, 12 Apr 2026 20:38:26 -0700
Subject: [PATCH 6/7] fix doc

Signed-off-by: ruit <ruit@nvidia.com>
---
 docs/design-docs/checkpointing.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/design-docs/checkpointing.md b/docs/design-docs/checkpointing.md
index dde3ae65cf..b76757a22c 100644
--- a/docs/design-docs/checkpointing.md
+++ b/docs/design-docs/checkpointing.md
@@ -64,10 +64,13 @@ uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
 
 Exports only the LoRA adapter weights in HuggingFace PEFT format without merging into the base model. This is useful when you want to serve the base model and adapter separately (e.g. with vLLM's LoRA support).
 
+Although the output is adapter-only, the converter still needs `--base-ckpt` to reconstruct the Megatron model, apply the LoRA modules, and load the adapter weights before exporting them to PEFT format.
+
 **Example:**
 
 ```sh
 uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
+    --base-ckpt <path_to_base_megatron_checkpoint>/iter_0000000 \
     --adapter-only \
     --adapter-ckpt <path_to_lora_adapter_checkpoint>/iter_0000000 \
     --hf-model-name <huggingface_model_name> \
@@ -78,7 +81,7 @@ uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
 
 | Argument | Description |
 |---|---|
-| `--base-ckpt` | Path to the base model's Megatron checkpoint directory (the `iter_XXXXXXX` folder). Required unless `--adapter-only` is set. |
+| `--base-ckpt` | Path to the base model's Megatron checkpoint directory (the `iter_XXXXXXX` folder). Required for both merged and adapter-only export. |
 | `--adapter-ckpt` | Path to the LoRA adapter's Megatron checkpoint directory (must contain a `run_config.yaml` with a `peft` section). |
 | `--hf-model-name` | HuggingFace model identifier used to resolve the model architecture and tokenizer (e.g. `Qwen/Qwen2.5-7B`). |
 | `--hf-ckpt-path` | Output directory for the exported HuggingFace checkpoint or adapter. Must not already exist. |

From 2db3e7d3f31c9f8a418e30f4c7a14ff1f016832c Mon Sep 17 00:00:00 2001
From: ruit <ruit@nvidia.com>
Date: Tue, 21 Apr 2026 00:08:30 -0700
Subject: [PATCH 7/7] fix: correct formatting in nsys-profiling.md installation
 instructions

Signed-off-by: ruit <ruit@nvidia.com>
---
 docs/nsys-profiling.md                    |  2 +-
 examples/converters/convert_lora_to_hf.py | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/docs/nsys-profiling.md b/docs/nsys-profiling.md
index dfbf085786..2428501616 100644
--- a/docs/nsys-profiling.md
+++ b/docs/nsys-profiling.md
@@ -6,7 +6,7 @@ NeMo RL supports Nsight profiling for Ray workers through environment variable p
 
 ## Prerequisites
 
-* Install NVIDIA Nsight Systems (`nsys`) on the compute nodes where workers will run. For Ubuntu installation instructions, see the [NVIDIA Nsight Systems Installation Guide](https://docs.nvidia.com/nsight-systems/InstallationGuide/index.html#package-manager-installation)).
+* Install NVIDIA Nsight Systems (`nsys`) on the compute nodes where workers will run. For Ubuntu installation instructions, see the [NVIDIA Nsight Systems Installation Guide](https://docs.nvidia.com/nsight-systems/InstallationGuide/index.html#package-manager-installation).
 
 **Note: If you're using NeMo RL containers, `nsys` is already installed.**
 
diff --git a/examples/converters/convert_lora_to_hf.py b/examples/converters/convert_lora_to_hf.py
index b52402f0e8..bc2744750d 100644
--- a/examples/converters/convert_lora_to_hf.py
+++ b/examples/converters/convert_lora_to_hf.py
@@ -258,7 +258,22 @@ def export_lora_adapter_to_hf(
     hf_model_name: str,
     hf_ckpt_path: str,
 ) -> str:
-    """Export only the LoRA adapter in HuggingFace PEFT format without using AutoBridge.export_adapter_ckpt."""
+    """Export a Megatron LoRA adapter to HuggingFace PEFT format.
+
+    Args:
+        base_ckpt: Path to the base model Megatron checkpoint (iter_XXXXXXX directory).
+        adapter_ckpt: Path to the LoRA adapter Megatron checkpoint (iter_XXXXXXX directory).
+                      Must contain a ``run_config.yaml`` with a ``peft`` section.
+        hf_model_name: HuggingFace model identifier (e.g. ``zai-org/GLM-5``).
+        hf_ckpt_path: Output directory for the HuggingFace PEFT adapter checkpoint.
+
+    Returns:
+        The *hf_ckpt_path* that was written to.
+
+    Raises:
+        FileExistsError: If *hf_ckpt_path* already exists.
+        ValueError: If the adapter's ``run_config.yaml`` has no ``peft`` section.
+    """
     if os.path.exists(hf_ckpt_path):
         raise FileExistsError(f"Output path already exists: {hf_ckpt_path}")