add functional test and fix doc

RayenTian · RayenTian · commit 0642c7a373d8 · 2026-04-09T02:11:31.000-07:00
Signed-off-by: ruit &lt;ruit@nvidia.com&gt;
diff --git a/docs/design-docs/checkpointing.md b/docs/design-docs/checkpointing.md
@@ -37,11 +37,20 @@ uv run --extra mcore examples/converters/convert_megatron_to_hf.py \
   --hf-ckpt-path=<path_to_save_hf_ckpt>
 ```
 
-## Merging Megatron LoRA Adapter Checkpoints to Hugging Face Format
+## Converting Megatron LoRA Adapter Checkpoints to Hugging Face Format
 
-When training with [LoRA (Low-Rank Adaptation)](../guides/sft.md#lora-configuration) on the Megatron backend, the resulting checkpoint contains only the adapter weights alongside the base model configuration. To produce a standalone Hugging Face checkpoint suitable for inference or evaluation, use the LoRA merger script. It loads the base model, applies the LoRA adapter weights on top, and saves the merged result in Hugging Face format.
+When training with [LoRA (Low-Rank Adaptation)](../guides/sft.md#lora-configuration) on the Megatron backend, the resulting checkpoint contains only the adapter weights alongside the base model configuration. The `convert_lora_to_hf.py` script supports two export modes:
 
-This script requires Megatron-Core, so make sure to launch with the `mcore` extra:
+- **Merged**: fold the LoRA adapter into the base model and export a single standalone HuggingFace checkpoint.
+- **Adapter-only**: export only the LoRA adapter weights in [HuggingFace PEFT](https://huggingface.co/docs/peft) format, keeping the base model separate.
+
+This script requires Megatron-Core, so make sure to launch with the `mcore` extra.
+
+### Option A — Merged checkpoint
+
+Loads the base model, applies the LoRA adapter weights on top, and saves the merged result in HuggingFace format. The output can be used directly with `AutoModelForCausalLM.from_pretrained` or passed to the [evaluation pipeline](../guides/eval.md).
+
+**Example:**
 
 ```sh
 uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
@@ -51,24 +60,26 @@ uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
     --hf-ckpt-path <output_path_for_merged_hf_model>
 ```
 
-### Arguments
+### Option B — Adapter-only (PEFT format)
 
-| Argument | Description |
-|---|---|
-| `--base-ckpt` | Path to the base model's Megatron checkpoint directory (the `iter_XXXXXXX` folder). |
-| `--adapter-ckpt` | Path to the LoRA adapter's Megatron checkpoint directory (must contain a `run_config.yaml` with a `peft` section). |
-| `--hf-model-name` | HuggingFace model identifier used to resolve the model architecture and tokenizer (e.g. `Qwen/Qwen2.5-7B`). |
-| `--hf-ckpt-path` | Output directory for the merged HuggingFace checkpoint. Must not already exist. |
+Exports only the LoRA adapter weights in HuggingFace PEFT format without merging into the base model. This is useful when you want to serve the base model and adapter separately (e.g. with vLLM's LoRA support).
 
-### Example
+**Example:**
 
 ```sh
-# Merge a LoRA adapter trained on Qwen2.5-7B back into a full HF checkpoint
 uv run --extra mcore python examples/converters/convert_lora_to_hf.py \
-    --base-ckpt ~/.cache/huggingface/nemo_rl/Qwen/Qwen2.5-7B/iter_0000000 \
-    --adapter-ckpt results/sft_lora/step_100/policy/weights/iter_0000000 \
-    --hf-model-name Qwen/Qwen2.5-7B \
-    --hf-ckpt-path results/sft_lora/merged_hf
+    --adapter-only \
+    --adapter-ckpt <path_to_lora_adapter_checkpoint>/iter_0000000 \
+    --hf-model-name <huggingface_model_name> \
+    --hf-ckpt-path <output_path_for_hf_adapter>
 ```
 
-The merged checkpoint can then be used directly with `AutoModelForCausalLM.from_pretrained` or passed to the [evaluation pipeline](../guides/eval.md).
+### Arguments
+
+| Argument | Description |
+|---|---|
+| `--base-ckpt` | Path to the base model's Megatron checkpoint directory (the `iter_XXXXXXX` folder). Required unless `--adapter-only` is set. |
+| `--adapter-ckpt` | Path to the LoRA adapter's Megatron checkpoint directory (must contain a `run_config.yaml` with a `peft` section). |
+| `--hf-model-name` | HuggingFace model identifier used to resolve the model architecture and tokenizer (e.g. `Qwen/Qwen2.5-7B`). |
+| `--hf-ckpt-path` | Output directory for the exported HuggingFace checkpoint or adapter. Must not already exist. |
+| `--adapter-only` | Export only the LoRA adapter in HuggingFace PEFT format without merging into the base model. |
diff --git a/docs/guides/sft.md b/docs/guides/sft.md
@@ -339,7 +339,12 @@ For more details on LoRA, see [LoRA: Low-Rank Adaptation of Large Language Model
 
 ### Exporting a LoRA Checkpoint to Hugging Face Format
 
-After training with LoRA on the Megatron backend, use the LoRA merger script to fold the adapter weights into the base model and produce a standalone Hugging Face checkpoint for inference or evaluation. See the [Checkpointing documentation](../design-docs/checkpointing.md#merging-megatron-lora-adapter-checkpoints-to-hugging-face-format) for full usage details.
+After training with LoRA on the Megatron backend, the `convert_lora_to_hf.py` script supports two export modes:
+
+- **Merged**: fold the adapter into the base model and export a single standalone HuggingFace checkpoint for inference or evaluation.
+- **Adapter-only**: export only the adapter weights in HuggingFace PEFT format, keeping the base model separate (e.g. for use with vLLM's LoRA support).
+
+See the [Checkpointing documentation](../design-docs/checkpointing.md#converting-megatron-lora-adapter-checkpoints-to-hugging-face-format) for full usage details and examples.
 
 ## Optimizations
 
diff --git a/tests/functional/test_converter_roundtrip.py b/tests/functional/test_converter_roundtrip.py
@@ -54,6 +54,7 @@
 _convert_lora_mod = importlib.util.module_from_spec(_spec)
 _spec.loader.exec_module(_convert_lora_mod)
 merge_lora_to_hf = _convert_lora_mod.merge_lora_to_hf
+export_lora_adapter_to_hf = _convert_lora_mod.export_lora_adapter_to_hf
 
 
 def create_test_config() -> Dict[str, Any]:
@@ -374,7 +375,6 @@ def create_megatron_lora_checkpoint(
         model_cfg.fp8_param = False
 
         peft = LoRA(**peft_cfg)
-        model_cfg.peft = peft
         if hasattr(model_cfg, "finalize"):
             model_cfg.finalize()
         with megatron_cpu_init_context(model_cfg):
@@ -387,22 +387,41 @@ def create_megatron_lora_checkpoint(
         for m in megatron_model:
             m.requires_grad_(False)
 
-        # Apply a small deterministic perturbation to LoRA weights so the
-        # merge produces something different from the base.
+        # Save the base model first to create the checkpoint directory structure
+        # and write run_config.yaml (which contains the "model" key needed by
+        # load_model_config). Adapter weights are saved separately below.
+        adapter_dir = os.path.join(temp_dir, "lora_adapter_checkpoint")
+        save_megatron_model(megatron_model, adapter_dir)
+        iter_dir = os.path.join(adapter_dir, "iter_0000000")
+
+        # Apply LoRA wrappers (same pattern as merge_lora_to_hf) and perturb
+        # adapter weights so that the merge produces something different from base.
+        megatron_model = peft(megatron_model, training=False)
+        gc.collect()
+
         torch.manual_seed(42)
         for m in megatron_model:
             for name, param in m.named_parameters():
                 if "lora_" in name or "adapter" in name:
                     param.data.normal_(0, 0.01)
 
-        adapter_dir = os.path.join(temp_dir, "lora_adapter_checkpoint")
-        save_megatron_model(megatron_model, adapter_dir)
+        # Save only the adapter weights using dist_checkpointing, which is the
+        # format that merge_lora_to_hf expects to load from adapter_ckpt.
+        from megatron.bridge.training.checkpointing import (
+            _generate_model_state_dict,
+            apply_peft_adapter_filter_to_state_dict,
+        )
+        from megatron.core import dist_checkpointing
 
-        # save_megatron_model already writes a run_config.yaml with the
-        # "model" key.  Merge the peft section into it so that both
+        adapter_sharded_sd = _generate_model_state_dict(megatron_model, {})
+        adapter_sharded_sd = apply_peft_adapter_filter_to_state_dict(
+            adapter_sharded_sd, peft
+        )
+        dist_checkpointing.save(adapter_sharded_sd, iter_dir)
+
+        # Merge the peft section into run_config.yaml so that both
         # load_model_config (needs "model") and the LoRA converter
         # (needs "peft") can find what they expect.
-        iter_dir = os.path.join(adapter_dir, "iter_0000000")
         run_config_path = os.path.join(iter_dir, "run_config.yaml")
         with open(run_config_path) as f:
             run_config = yaml.safe_load(f)
@@ -503,6 +522,17 @@ def main():
             hf_ckpt_path=lora_merged_hf_path,
         )
 
+        # Step 7d: Export LoRA adapter only in HuggingFace PEFT format
+        print("\n" + "=" * 60)
+        print("STEP 7d: Exporting LoRA adapter only (PEFT format)")
+        print("=" * 60)
+        lora_adapter_hf_path = os.path.join(temp_dir, "lora_adapter_hf")
+        export_lora_adapter_to_hf(
+            adapter_ckpt=lora_adapter_path,
+            hf_model_name=model_name,
+            hf_ckpt_path=lora_adapter_hf_path,
+        )
+
         # Step 8: Load converted models and compare
         print("\n" + "=" * 60)
         print("STEP 8: Loading converted models and comparing")
@@ -570,11 +600,11 @@ def main():
         )
         lora_merged_state_dict = get_model_state_dict(lora_merged_model)
 
-        lora_keys = set(lora_merged_state_dict.keys())
-        assert lora_keys == set(original_state_dict.keys()), (
+        lora_merged_keys = set(lora_merged_state_dict.keys())
+        assert lora_merged_keys == set(original_state_dict.keys()), (
             f"LoRA merged model key mismatch.\n"
-            f"  Extra: {lora_keys - set(original_state_dict.keys())}\n"
-            f"  Missing: {set(original_state_dict.keys()) - lora_keys}"
+            f"  Extra: {lora_merged_keys - set(original_state_dict.keys())}\n"
+            f"  Missing: {set(original_state_dict.keys()) - lora_merged_keys}"
         )
         print("✓ LoRA merged model has the expected key structure")
 
@@ -583,9 +613,9 @@ def main():
         any_different = False
         for key in original_state_dict:
             v_orig = original_state_dict[key]
-            v_lora = lora_merged_state_dict[key]
+            v_lora_merged = lora_merged_state_dict[key]
             if isinstance(v_orig, torch.Tensor) and not torch.allclose(
-                v_orig, v_lora, rtol=1e-5, atol=1e-5
+                v_orig, v_lora_merged, rtol=1e-5, atol=1e-5
             ):
                 any_different = True
                 break
@@ -600,8 +630,59 @@ def main():
         with torch.no_grad():
             lora_output = lora_merged_model(test_input_lora)
         print("✓ LoRA merged model can perform forward pass")
+        # del lora_merged_model
+        gc.collect()
 
-        del lora_merged_model
+        # Adapter-only (PEFT) export assertions
+        print("Verifying adapter-only PEFT export...")
+        adapter_config_path = os.path.join(lora_adapter_hf_path, "adapter_config.json")
+        assert os.path.exists(adapter_config_path), (
+            f"adapter_config.json not found in {lora_adapter_hf_path}"
+        )
+        weight_candidates = ["adapter_model.safetensors", "adapter_model.bin"]
+        weight_file_found = any(
+            os.path.exists(os.path.join(lora_adapter_hf_path, f))
+            for f in weight_candidates
+        )
+        assert weight_file_found, (
+            f"No adapter weight file found in {lora_adapter_hf_path}. "
+            f"Expected one of: {weight_candidates}"
+        )
+        print(
+            "✓ PEFT adapter directory has expected files (adapter_config.json + weights)"
+        )
+
+        # Forward pass using the already-merged model from Step 7c.
+        test_input_peft = torch.randint(0, 1000, (1, 10))
+        with torch.no_grad():
+            lora_merged_model(test_input_peft)
+        print("✓ LoRA merged model can perform a forward pass")
+
+        # Verify the adapter-only export produces the same merged weights as Step 7c
+        # by calling merge_lora_to_hf again with the same Megatron adapter. This
+        # avoids tied-weight complications from PeftModel.merge_and_unload().
+        adapter_only_merged_hf_path = os.path.join(temp_dir, "adapter_only_merged_hf")
+        merge_lora_to_hf(
+            base_ckpt=megatron_checkpoint_path,
+            adapter_ckpt=lora_adapter_path,
+            hf_model_name=model_name,
+            hf_ckpt_path=adapter_only_merged_hf_path,
+        )
+        adapter_only_merged_model = AutoModelForCausalLM.from_pretrained(
+            adapter_only_merged_hf_path,
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+        )
+        adapter_only_merged_state_dict = get_model_state_dict(adapter_only_merged_model)
+        assert_state_dicts_equal(
+            adapter_only_merged_state_dict,
+            lora_merged_state_dict,
+            "adapter-only export + merge_lora_to_hf (Step 7d)",
+            "lora merged (Step 7c)",
+        )
+        print("✓ adapter-only merge via merge_lora_to_hf matches Step 7c")
+
+        del adapter_only_merged_model, lora_merged_model
         gc.collect()
 
         # Verify that both converted models have the expected structure
@@ -632,11 +713,13 @@ def main():
             megatron_output = megatron_converted_model(test_input)
 
         print(
-            "✓ Dtensor V1 and Dtensor V2 DCP, Megatron, and LoRA-merged models can perform forward passes"
+            "✓ Dtensor V1 and Dtensor V2 DCP, Megatron, and LoRA models can perform forward passes"
         )
 
         print("\n" + "=" * 80)
-        print("✓ ALL TESTS PASSED (DCP v1, DCP v2, Megatron, LoRA merge)!")
+        print(
+            "✓ ALL TESTS PASSED (DCP v1, DCP v2, Megatron, LoRA merge, LoRA adapter-only PEFT)!"
+        )
         print("=" * 80)