NVIDIA · jenchen13 · May 18, 2026 · kevalmorabia97 · May 20, 2026 · coderabbitai
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "tools/launcher/modules/Megatron-LM"]
 	path = tools/launcher/modules/Megatron-LM
 	url = https://github.com/NVIDIA/Megatron-LM.git
+[submodule "tools/launcher/modules/Megatron-Bridge"]
+	path = tools/launcher/modules/Megatron-Bridge
+	url = https://github.com/NVIDIA-NeMo/Megatron-Bridge.git
diff --git a/tools/launcher/common/megatron_bridge/import/import.sh b/tools/launcher/common/megatron_bridge/import/import.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Megatron-Bridge HF -> Megatron checkpoint import (CPU-capable).
+#
+# Required env: HF_MODEL_ID  (e.g. nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16)
+# Optional env:
+#   OUTPUT_DIR  Parent dir for the MCore checkpoint (default: cwd).
+#   TORCH_DTYPE Model dtype for HF load (default: bfloat16).
+#
+# Writes MCore checkpoint to ${OUTPUT_DIR}/<basename(HF_MODEL_ID)>-MCore
+#
+# Runs:
+#   python examples/conversion/convert_checkpoints.py import \
+#       --hf-model $HF_MODEL_ID \
+#       --megatron-path $OUTPUT_DIR/<model>-MCore \
+#       --torch-dtype $TORCH_DTYPE
+
+set -e
+
+if [[ -z "${HF_MODEL_ID}" ]]; then
+    echo "[ERROR] HF_MODEL_ID is required" >&2
+    exit 1
+fi
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+LAUNCHER_DIR="${SCRIPT_DIR}/../../.."
+BRIDGE_DIR="${LAUNCHER_DIR}/modules/Megatron-Bridge"
+MLM_DIR="${LAUNCHER_DIR}/modules/Megatron-LM"
+
+if ! python -c "import megatron.bridge" 2>/dev/null; then
+    echo "[INFO] Installing megatron-bridge from ${BRIDGE_DIR}"
+    unset PIP_CONSTRAINT
+    pip install -e "${BRIDGE_DIR}"
+fi
-if ! python -c "import megatron.bridge" 2>/dev/null; then
-    echo "[INFO] Installing megatron-bridge from ${BRIDGE_DIR}"
-    unset PIP_CONSTRAINT
-    pip install -e "${BRIDGE_DIR}"
-fi
+echo "[INFO] Installing megatron-bridge from ${BRIDGE_DIR}"
+unset PIP_CONSTRAINT
+pip install -e "${BRIDGE_DIR}"
-if ! python -c "import megatron.bridge" 2>/dev/null; then
-    echo "[INFO] Installing megatron-bridge from ${BRIDGE_DIR}"
-    unset PIP_CONSTRAINT
-    pip install -e "${BRIDGE_DIR}"
-fi
+echo "[INFO] Installing megatron-bridge from ${BRIDGE_DIR}"
+unset PIP_CONSTRAINT
+pip install -e "${BRIDGE_DIR}"
+
+if [[ -n "${EXTRA_PIP_DEPS}" ]]; then
+    echo "[INFO] Installing extra deps: ${EXTRA_PIP_DEPS}"
+    unset PIP_CONSTRAINT
+    read -r -a _deps <<< "${EXTRA_PIP_DEPS}"
+    # --no-build-isolation: mamba-ssm/causal-conv1d need torch visible at build time.
+    pip install --no-build-isolation "${_deps[@]}"
+fi
+
+# Megatron-Bridge needs newer megatron.core (incl. megatron.core.distributed.fsdp).
+# Prepend local Megatron-LM to PYTHONPATH so its sources shadow installed megatron-core.
+export PYTHONPATH="${MLM_DIR}:${PYTHONPATH}"
+
+OUTPUT_DIR="${OUTPUT_DIR:-$(pwd)}"
+MODEL_NAME="$(basename "${HF_MODEL_ID}")"
+MEGATRON_PATH="${OUTPUT_DIR}/${MODEL_NAME}-MCore"
+TORCH_DTYPE="${TORCH_DTYPE:-bfloat16}"
+
+mkdir -p "${OUTPUT_DIR}"
+
+cd "${BRIDGE_DIR}"
+exec python examples/conversion/convert_checkpoints.py import \
+    --hf-model "${HF_MODEL_ID}" \
+    --megatron-path "${MEGATRON_PATH}" \
+    --torch-dtype "${TORCH_DTYPE}"
diff --git a/...uncher/examples/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16/megatron_bridge_import.yaml b/...uncher/examples/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16/megatron_bridge_import.yaml
@@ -0,0 +1,34 @@
+# Megatron-Bridge import for nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16.
+#
+# Imports HF weights to a Megatron-LM checkpoint via AutoBridge.import_ckpt
+# (use_cpu_initialization=True). Uses a single 8xH100 Slurm node — Megatron-Bridge
+# requires at least 1 GPU for nccl init even with CPU-resident weights.
+#
+# Usage:
+#   export SLURM_HOST=<slurm-host>
+#   export SLURM_ACCOUNT=<your-team>
+#   export SLURM_PARTITION=<gpu-partition>   # default: batch
+#   export SLURM_JOB_DIR=/home/scratch.<user>/experiments
+#   export HF_TOKEN=<your-hf-token>          # gated model
+#   cd tools/launcher
+#   uv run launch.py --yaml examples/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16/megatron_bridge_import.yaml --yes
+
+job_name: Nemotron-3-Super-120B_bridge_import
+pipeline:
+  skip: false
+  allow_to_fail: false
+  note: "HF -> MCore import via Megatron-Bridge (8xH100)"
+
+  task_0:
+    script: common/megatron_bridge/import/import.sh
+    environment:
+      - HF_MODEL_ID: nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16
+      - OUTPUT_DIR: /scratchspace/megatron-bridge
+      - EXTRA_PIP_DEPS: "mamba-ssm causal-conv1d"
+    slurm_config:
+      _factory_: "slurm_factory"
+      partition: batch
+      nodes: 1
+      ntasks_per_node: 1
+      gpus_per_node: 8
+      time: "04:00:00"
diff --git a/tools/launcher/launch.py b/tools/launcher/launch.py
@@ -61,13 +61,17 @@
         "modules/Megatron-LM/megatron/*",
         "modules/Megatron-LM/examples/*",
         "modules/Megatron-LM/*.py",
+        "modules/Megatron-Bridge/src/*",
+        "modules/Megatron-Bridge/examples/*",
+        "modules/Megatron-Bridge/pyproject.toml",
+        "modules/Megatron-Bridge/README.md",
         "modules/Model-Optimizer/modelopt/*",
         "modules/Model-Optimizer/modelopt_recipes/*",
         "modules/Model-Optimizer/examples/*",
         "examples/*",
         "common/*",
     ],
-    relative_path=[LAUNCHER_DIR] * 8,
+    relative_path=[LAUNCHER_DIR] * 12,
 )
 
 MODELOPT_SRC_PATH = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")

diff --git a/tools/launcher/modules/Megatron-Bridge b/tools/launcher/modules/Megatron-Bridge
diff --git a/tools/launcher/modules/Megatron-LM b/tools/launcher/modules/Megatron-LM