Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ NVIDIA Model Optimizer Changelog (Linux)
- Add standalone type inference option (``--use_standalone_type_inference``) in ONNX AutoCast as an alternative to ONNX's ``infer_shapes``. This experimental feature performs type-only inference without shape inference, useful as a workaround when shape inference fails or to avoid unnecessary shape inference overhead.
- Add support for Kimi K2 Thinking model quantization from the original int4 checkpoint.
- Add support for ``params`` constraint based automatic neural architecture search in Minitron pruning (``mcore_minitron``) as an alternative to manual pruning (using ``export_config``). See `examples/pruning/README.md <https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/pruning>`_ for more details on its usage.
- Add support for calibration data with multiple samples in ``npz`` format in the ONNX Autocast workflow.

0.41 (2026-01-19)
^^^^^^^^^^^^^^^^^
Expand Down
9 changes: 8 additions & 1 deletion modelopt/onnx/autocast/referencerunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import onnx

from modelopt.onnx.autocast.logging_config import configure_logging, logger
from modelopt.onnx.quantization.calib_utils import CalibrationDataProvider
from modelopt.onnx.quantization.ort_utils import _prepare_ep_list

configure_logging()
Expand Down Expand Up @@ -70,7 +71,13 @@ def _load_inputs_from_json(self, input_data_path):

def _load_inputs_from_npz(self, input_data_path):
"""Load inputs from NPZ format."""
return [np.load(input_data_path)]
calib_data = np.load(input_data_path)

if isinstance(calib_data, np.lib.npyio.NpzFile):
# Wrap data into a CalibDataProvider to support a single NPZ file containing data from multiple batches
data_loader = {key: calib_data[key] for key in calib_data.files}
return CalibrationDataProvider(self.model, data_loader).calibration_data_list
return [calib_data]

def _validate_inputs(self, data_loader):
"""Validate that input names and shapes match the model."""
Expand Down
4 changes: 2 additions & 2 deletions modelopt/onnx/quantization/calib_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class CalibrationDataProvider(CalibrationDataReader):

def __init__(
self,
onnx_path: str,
onnx_path: str | onnx.ModelProto,
calibration_data: CalibrationDataType,
calibration_shapes: str | None = None,
):
Expand All @@ -58,7 +58,7 @@ def __init__(
logger.info("Setting up CalibrationDataProvider for calibration")
# Tensor data is not required to generate the calibration data
# So even if the model has external data, we don't need to load them here
onnx_model = onnx.load(onnx_path)
onnx_model = onnx.load(onnx_path) if isinstance(onnx_path, str) else onnx_path
input_names = get_input_names(onnx_model)
input_shapes = {} if calibration_shapes is None else parse_shapes_spec(calibration_shapes)
inferred_input_shapes = get_input_shapes(onnx_model)
Expand Down