Skip to content

Commit 39c6195

Browse files
committed
add MTP modules in excluded/ignore modules in config
Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
1 parent 2774a2c commit 39c6195

1 file changed

Lines changed: 12 additions & 0 deletions

File tree

modelopt/torch/export/unified_export_hf.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,18 @@ def _export_transformers_checkpoint(
694694

695695
quant_config = get_quant_config(model, is_modelopt_qlora=is_modelopt_qlora)
696696

697+
# Add MTP layer prefixes to exclude_modules if they were excluded from quantization
698+
# This ensures they appear in quantization_config["ignore"] in config.json
699+
mtp_layer_prefixes = getattr(model, "_mtp_layer_prefixes", None)
700+
if mtp_layer_prefixes:
701+
exclude_modules = quant_config["quantization"].setdefault("exclude_modules", [])
702+
for prefix in mtp_layer_prefixes:
703+
# Add wildcard pattern to exclude all submodules under this MTP layer
704+
pattern = f"{prefix}*"
705+
if pattern not in exclude_modules:
706+
exclude_modules.append(pattern)
707+
print(f"Adding MTP layer to quantization_config ignore: {pattern}")
708+
697709
# Process all quantized modules and export weights
698710
_process_quantized_modules(model, dtype, is_modelopt_qlora)
699711

0 commit comments

Comments
 (0)