diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 452f365387..e615627b23 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,7 @@ NVIDIA Model Optimizer Changelog (Linux) - Add standalone type inference option (``--use_standalone_type_inference``) in ONNX AutoCast as an alternative to ONNX's ``infer_shapes``. This experimental feature performs type-only inference without shape inference, useful as a workaround when shape inference fails or to avoid unnecessary shape inference overhead. - Add support for Kimi K2 Thinking model quantization from the original int4 checkpoint. - Add support for ``params`` constraint based automatic neural architecture search in Minitron pruning (``mcore_minitron``) as an alternative to manual pruning (using ``export_config``). See `examples/pruning/README.md `_ for more details on its usage. +- Add support for calibration data with multiple samples in ``npz`` format in the ONNX Autocast workflow. 0.41 (2026-01-19) ^^^^^^^^^^^^^^^^^ diff --git a/modelopt/onnx/autocast/referencerunner.py b/modelopt/onnx/autocast/referencerunner.py index 8dc91ff089..4d4f9a987c 100644 --- a/modelopt/onnx/autocast/referencerunner.py +++ b/modelopt/onnx/autocast/referencerunner.py @@ -30,6 +30,7 @@ import onnx from modelopt.onnx.autocast.logging_config import configure_logging, logger +from modelopt.onnx.quantization.calib_utils import CalibrationDataProvider from modelopt.onnx.quantization.ort_utils import _prepare_ep_list configure_logging() @@ -70,7 +71,13 @@ def _load_inputs_from_json(self, input_data_path): def _load_inputs_from_npz(self, input_data_path): """Load inputs from NPZ format.""" - return [np.load(input_data_path)] + calib_data = np.load(input_data_path) + + if isinstance(calib_data, np.lib.npyio.NpzFile): + # Wrap data into a CalibDataProvider to support a single NPZ file containing data from multiple batches + data_loader = {key: calib_data[key] for key in calib_data.files} + return CalibrationDataProvider(self.model, data_loader).calibration_data_list + return [calib_data] def _validate_inputs(self, data_loader): """Validate that input names and shapes match the model.""" diff --git a/modelopt/onnx/quantization/calib_utils.py b/modelopt/onnx/quantization/calib_utils.py index e8a538e61c..56e0d4cc01 100644 --- a/modelopt/onnx/quantization/calib_utils.py +++ b/modelopt/onnx/quantization/calib_utils.py @@ -38,7 +38,7 @@ class CalibrationDataProvider(CalibrationDataReader): def __init__( self, - onnx_path: str, + onnx_path: str | onnx.ModelProto, calibration_data: CalibrationDataType, calibration_shapes: str | None = None, ): @@ -58,7 +58,7 @@ def __init__( logger.info("Setting up CalibrationDataProvider for calibration") # Tensor data is not required to generate the calibration data # So even if the model has external data, we don't need to load them here - onnx_model = onnx.load(onnx_path) + onnx_model = onnx.load(onnx_path) if isinstance(onnx_path, str) else onnx_path input_names = get_input_names(onnx_model) input_shapes = {} if calibration_shapes is None else parse_shapes_spec(calibration_shapes) inferred_input_shapes = get_input_shapes(onnx_model)