File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -694,6 +694,18 @@ def _export_transformers_checkpoint(
694694
695695 quant_config = get_quant_config (model , is_modelopt_qlora = is_modelopt_qlora )
696696
697+ # Add MTP layer prefixes to exclude_modules if they were excluded from quantization
698+ # This ensures they appear in quantization_config["ignore"] in config.json
699+ mtp_layer_prefixes = getattr (model , "_mtp_layer_prefixes" , None )
700+ if mtp_layer_prefixes :
701+ exclude_modules = quant_config ["quantization" ].setdefault ("exclude_modules" , [])
702+ for prefix in mtp_layer_prefixes :
703+ # Add wildcard pattern to exclude all submodules under this MTP layer
704+ pattern = f"{ prefix } *"
705+ if pattern not in exclude_modules :
706+ exclude_modules .append (pattern )
707+ print (f"Adding MTP layer to quantization_config ignore: { pattern } " )
708+
697709 # Process all quantized modules and export weights
698710 _process_quantized_modules (model , dtype , is_modelopt_qlora )
699711
You can’t perform that action at this time.
0 commit comments