Skip to content

Commit 4b4ef63

Browse files
committed
Update
Signed-off-by: Chenjie Luo <chenjiel@nvidia.com>
1 parent 2e29ee7 commit 4b4ef63

3 files changed

Lines changed: 3 additions & 3 deletions

File tree

examples/llm_ptq/hf_ptq.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
export_hf_checkpoint,
5454
export_tensorrt_llm_checkpoint,
5555
get_model_type,
56+
save_expert_token_count_table,
5657
)
5758
from modelopt.torch.export.model_utils import get_language_model_from_vl, is_multimodal_model
5859
from modelopt.torch.quantization.config import _default_disabled_quantizer_cfg, need_calibration
@@ -727,6 +728,7 @@ def post_quantize(
727728

728729
if args.verbose:
729730
mtq.print_quant_summary(full_model)
731+
save_expert_token_count_table(full_model, args.export_path)
730732

731733
# Run some samples
732734
torch.cuda.empty_cache()

modelopt/torch/export/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from .model_config import *
2020
from .model_config_export import *
2121
from .model_utils import *
22+
from .moe_utils import *
2223
from .plugins import *
2324
from .transformer_engine import *
2425
from .unified_export_hf import *

modelopt/torch/export/unified_export_hf.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@
7676
QUANTIZATION_W4A8_NVFP4_FP8,
7777
)
7878
from .model_utils import get_language_model_from_vl, is_multimodal_model
79-
from .moe_utils import save_expert_token_count_table
8079
from .plugins import export_spec_ckpt_config, export_spec_ckpt_state_dict, spec_opt_only
8180
from .quant_utils import (
8281
fuse_prequant_layernorm,
@@ -1004,8 +1003,6 @@ def export_hf_checkpoint(
10041003
try:
10051004
post_state_dict, hf_quant_config = _export_transformers_checkpoint(model, dtype)
10061005

1007-
save_expert_token_count_table(model, export_dir)
1008-
10091006
if hf_quant_config is not None:
10101007
# Save hf_quant_config.json for backward compatibility
10111008
with open(f"{export_dir}/hf_quant_config.json", "w") as file:

0 commit comments

Comments
 (0)