Expose calibration_data_reader to the public interface to allow users to create their own iterator

dthienan-nv · dthienan-nv · commit 8f79948976ed · 2026-03-20T12:24:09.000-04:00
Signed-off-by: dmoodie &lt;dmoodie@nvidia.com&gt;
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,5 +1,10 @@
 NVIDIA Model Optimizer Changelog
 ================================
+0.44 (2026-04-xx)
+
+**New Features**
+- Added iterator interface using CalibrationDataReader in ONNX quantization workflow.
+
 
 0.44 (2026-05-xx)
 ^^^^^^^^^^^^^^^^^
diff --git a/modelopt/onnx/quantization/quantize.py b/modelopt/onnx/quantization/quantize.py
@@ -43,6 +43,7 @@
 import onnx.onnx_cpp2py_export.checker as C
 import onnx_graphsurgeon as gs
 import onnxslim
+from onnxruntime.quantization.calibrate import CalibrationDataReader
 
 from modelopt.onnx.logging_config import configure_logging, logger
 from modelopt.onnx.op_types import is_data_dependent_shape_op
@@ -305,6 +306,7 @@ def quantize(
     calibration_data: CalibrationDataType = None,
     calibration_method: str | None = None,
     calibration_cache_path: str | None = None,
+    calibration_data_reader: CalibrationDataReader | None = None,
     calibration_shapes: str | None = None,
     calibration_eps: list[str] = ["cpu", "cuda:0", "trt"],
     override_shapes: str | None = None,
@@ -361,6 +363,8 @@ def quantize(
             and int4: {'awq_clip' (default), 'awq_lite', 'awq_full', 'rtn_dq'}.
         calibration_cache_path:
             Path to pre-calculated activation tensor ranges, also known as calibration cache.
+        calibration_data_reader:
+            Instance of a CalibrationDataReader object to provide calibration data.
         calibration_shapes:
             Input shapes used for calibration process.
             It should be provided as a string representing the shape of each input tensors for one calibration step.
@@ -571,13 +575,14 @@ def quantize(
     )
     trt_plugins = update_trt_ep_support(calibration_eps, has_dds_op, has_custom_op, trt_plugins)  # type: ignore[arg-type]
 
-    # Use random scales if calibration data is not supplied
-    if calibration_data is None:
-        calibration_data_reader = RandomDataProvider(onnx_path, calibration_shapes)
-    else:
-        calibration_data_reader = CalibrationDataProvider(
-            onnx_path, calibration_data, calibration_shapes
-        )
+    if calibration_data_reader is None:
+        # Use random scales if calibration data is not supplied
+        if calibration_data is None:
+            calibration_data_reader = RandomDataProvider(onnx_path, calibration_shapes)
+        else:
+            calibration_data_reader = CalibrationDataProvider(
+                onnx_path, calibration_data, calibration_shapes
+            )
 
     nodes_to_quantize = nodes_to_quantize or []
     nodes_to_exclude = nodes_to_exclude or []
diff --git a/tests/unit/onnx/quantization/test_quantize_int8.py b/tests/unit/onnx/quantization/test_quantize_int8.py
@@ -20,6 +20,7 @@
 import pytest
 import torch
 from _test_utils.onnx.lib_test_models import SimpleMLP, export_as_onnx
+from onnxruntime.quantization.calibrate import CalibrationDataReader
 
 import modelopt.onnx.quantization as moq
 
@@ -34,14 +35,15 @@ def assert_nodes_are_quantized(nodes):
     return True
 
 
-@pytest.mark.parametrize("high_precision_dtype", ["fp32", "fp16", "bf16"])
-def test_int8(tmp_path, high_precision_dtype):
+def int8_test_helper(tmp_path, high_precision_dtype, **kwargs):
     model_torch = SimpleMLP()
     input_tensor = torch.randn(2, 16, 16)
 
     onnx_path = os.path.join(tmp_path, "model.onnx")
     export_as_onnx(model_torch, input_tensor, onnx_filename=onnx_path)
-    moq.quantize(onnx_path, quantize_mode="int8", high_precision_dtype=high_precision_dtype)
+    moq.quantize(
+        onnx_path, quantize_mode="int8", high_precision_dtype=high_precision_dtype, **kwargs
+    )
 
     # Output model should be produced in the same tmp_path
     output_onnx_path = onnx_path.replace(".onnx", ".quant.onnx")
@@ -55,3 +57,45 @@ def test_int8(tmp_path, high_precision_dtype):
     # Check that all MatMul nodes are quantized
     mm_nodes = [n for n in graph.nodes if n.op == "MatMul"]
     assert assert_nodes_are_quantized(mm_nodes)
+
+
+@pytest.mark.parametrize("high_precision_dtype", ["fp32", "fp16", "bf16"])
+def test_int8(tmp_path, high_precision_dtype):
+    int8_test_helper(tmp_path, high_precision_dtype)
+
+
+@pytest.mark.parametrize("high_precision_dtype", ["fp32", "fp16", "bf16"])
+def test_int8_with_calibration_reader(tmp_path, high_precision_dtype):
+    input_tensor = torch.randn(2, 16, 16)
+
+    # Calibration data comes from a custom data reader, enabling iterator based reading functionality
+    class ExampleCalibrationDataReader(CalibrationDataReader):
+        def __init__(self, input_data):
+            self.data_list = [{"input": input_data.numpy()}]
+            self.iter = iter(self.data_list)
+            self.get_first_calls = 0
+            self.get_next_calls = 0
+
+        def get_next(self):
+            self.get_first_calls += 1
+            return next(self.iter, None)
+
+        def get_first(self):
+            self.get_next_calls += 1
+            return self.data_list[0]
+
+        def rewind(self):
+            self.iter = iter(self.data_list)
+
+    calibration_reader = ExampleCalibrationDataReader(input_tensor)
+    int8_test_helper(tmp_path, high_precision_dtype, calibration_data_reader=calibration_reader)
+    assert calibration_reader.get_first_calls > 0 or calibration_reader.get_next_calls > 0
+
+
+@pytest.mark.parametrize("high_precision_dtype", ["fp32", "fp16", "bf16"])
+def test_int8_with_calibration_data(tmp_path, high_precision_dtype):
+    input_tensor = torch.randn(2, 16, 16)
+
+    # test pre-allocated calibration data pathway
+    calibration_data = {"input": input_tensor.numpy()}
+    int8_test_helper(tmp_path, high_precision_dtype, calibration_data=calibration_data)