File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 5353 export_hf_checkpoint ,
5454 export_tensorrt_llm_checkpoint ,
5555 get_model_type ,
56+ save_expert_token_count_table ,
5657)
5758from modelopt .torch .export .model_utils import get_language_model_from_vl , is_multimodal_model
5859from modelopt .torch .quantization .config import _default_disabled_quantizer_cfg , need_calibration
@@ -727,6 +728,7 @@ def post_quantize(
727728
728729 if args .verbose :
729730 mtq .print_quant_summary (full_model )
731+ save_expert_token_count_table (full_model , args .export_path )
730732
731733 # Run some samples
732734 torch .cuda .empty_cache ()
Original file line number Diff line number Diff line change 1919from .model_config import *
2020from .model_config_export import *
2121from .model_utils import *
22+ from .moe_utils import *
2223from .plugins import *
2324from .transformer_engine import *
2425from .unified_export_hf import *
Original file line number Diff line number Diff line change 7676 QUANTIZATION_W4A8_NVFP4_FP8 ,
7777)
7878from .model_utils import get_language_model_from_vl , is_multimodal_model
79- from .moe_utils import save_expert_token_count_table
8079from .plugins import export_spec_ckpt_config , export_spec_ckpt_state_dict , spec_opt_only
8180from .quant_utils import (
8281 fuse_prequant_layernorm ,
@@ -1004,8 +1003,6 @@ def export_hf_checkpoint(
10041003 try :
10051004 post_state_dict , hf_quant_config = _export_transformers_checkpoint (model , dtype )
10061005
1007- save_expert_token_count_table (model , export_dir )
1008-
10091006 if hf_quant_config is not None :
10101007 # Save hf_quant_config.json for backward compatibility
10111008 with open (f"{ export_dir } /hf_quant_config.json" , "w" ) as file :
You can’t perform that action at this time.
0 commit comments