address cr

galagam · galagam · commit fc4d5414259d · 2026-01-27T07:10:42.000-08:00
Signed-off-by: Gal Hubara Agam &lt;96368689+galagam@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/autocast/convert.py b/modelopt/onnx/autocast/convert.py
@@ -85,7 +85,7 @@ def convert_to_mixed_precision(
         trt_plugins_precision: List indicating the precision for each custom op.
         max_depth_of_reduction: Maximum depth of reduction for node classification.
         opset: Target ONNX opset version. If None, uses default minimum opset based on low_precision_type
-               (22 for bf16, 13 for fp16). The opset may be automatically increased if certain operations
+               (22 for bf16, 19 for fp16). The opset may be automatically increased if certain operations
                require a higher version.
         use_standalone_type_inference: If True, use standalone type inference implementation instead of ONNX's
                                   infer_shapes. This is a workaround (WAR) when only type inference is
diff --git a/modelopt/onnx/quantization/quantize.py b/modelopt/onnx/quantization/quantize.py
@@ -80,6 +80,17 @@
 __all__ = ["quantize"]
 
 
+def _normalize_quantize_mode_for_opset(quantize_mode: str) -> str:
+    """Map variants like "int4_awq", "int4_rtn", "nvfp4" to their base precision types for lookup purposes."""
+    mode_lower = quantize_mode.lower()
+    if "int4" in mode_lower:
+        return "int4"
+    if "nvfp4" in mode_lower or "float4" in mode_lower:
+        return "float4_e2m1fn"
+    # For "int8", "fp8", etc., return as-is (fp8 falls back to BASE_MIN_OPSET which is correct)
+    return quantize_mode
+
+
 def _preprocess_onnx(
     onnx_path: str,
     use_external_data_format: bool,
@@ -126,7 +137,9 @@ def _preprocess_onnx(
     original_opset_version = get_opset_version(onnx_model)
 
     # Determine minimum required opset based on quantization mode
-    mode_min_opset = QDQ_PRECISION_MIN_OPSET.get(quantize_mode, BASE_MIN_OPSET)
+    # Normalize quantize_mode to handle variants like "int4_awq", "nvfp4", etc.
+    normalized_mode = _normalize_quantize_mode_for_opset(quantize_mode)
+    mode_min_opset = QDQ_PRECISION_MIN_OPSET.get(normalized_mode, BASE_MIN_OPSET)
 
     # Determine target opset version
     if opset is not None:
diff --git a/setup.py b/setup.py
@@ -48,8 +48,8 @@
         "onnx-graphsurgeon",
         "onnx~=1.19.0",
         "onnxconverter-common~=1.16.0",
-        "onnxruntime~=1.23.0 ; platform_machine == 'aarch64' or platform_system == 'Darwin'",
-        "onnxruntime-gpu~=1.23.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin'",
+        "onnxruntime~=1.22.0 ; platform_machine == 'aarch64' or platform_system == 'Darwin'",
+        "onnxruntime-gpu~=1.22.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin'",
         "onnxscript",  # For autocast opset conversion and test_onnx_dynamo_export unit test
         "onnxslim>=0.1.76",
         "polygraphy>=0.49.22",