[5615343,5597780,5371126] Upgrade ORT to 1.24 (#928)

gcunhase · kevalmorabia97 · web-flow · commit 3fe7e65b706e · 2026-02-27T00:03:13.000+05:30
## What does this PR do? **Type of change:** Bug fix **Overview:** Upgrade ORT to 1.24.x to fix various bugs (5615343, 5597780, 5371126). TODO: Verify no regressions by bumping ORT once @ajrasane is back ## Usage See each bug. ## Testing See each bug. ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes  - **Did you write any new necessary tests?**: No - **Did you add or update any necessary documentation?**: No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: Yes   ## Summary by CodeRabbit ## Bug Fixes * Upgraded ONNX Runtime to version 1.24.2, addressing multiple reported issues * Updated system requirements and installation documentation to reflect the new ONNX Runtime version  --------- Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com> Signed-off-by: Gwena Cunha <4861122+gcunhase@users.noreply.github.com> Co-authored-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -4,6 +4,10 @@ NVIDIA Model Optimizer Changelog (Linux)
 0.43 (2026-03-xx)
 ^^^^^^^^^^^^^^^^^
 
+**Bug Fixes**
+
+- ONNX Runtime dependency upgraded to 1.24 to solve missing graph outputs when using the TensorRT Execution Provider.
+
 **New Features**
 
 - User does not need to manually register MOE modules to cover experts calibration coverage in PTQ workflow.
diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst
@@ -20,7 +20,7 @@ Latest Model Optimizer (``nvidia-modelopt``) currently has the following system
 +-------------------------+-----------------------------+
 | TensorRT-LLM (Optional) |  >=1.0                      |
 +-------------------------+-----------------------------+
-| ONNX Runtime (Optional) |  1.22                       |
+| ONNX Runtime (Optional) |  1.24                       |
 +-------------------------+-----------------------------+
 | TensorRT (Optional)     |  >=10.0                     |
 +-------------------------+-----------------------------+
diff --git a/modelopt/onnx/__init__.py b/modelopt/onnx/__init__.py
@@ -16,6 +16,7 @@
 """Model optimization subpackage for onnx."""
 
 import sys
+import warnings
 
 MIN_PYTHON_VERSION = (3, 10)
 
@@ -26,6 +27,12 @@
     raise ImportError(f"{e}\nPlease install optional ``[onnx]`` dependencies.")
 
 
+if sys.version_info < (3, 11):
+    warnings.warn(
+        "`modelopt.onnx` package will drop python<3.11 support in a future release",
+        DeprecationWarning,
+    )
+
 # Check the current Python version
 if sys.version_info < MIN_PYTHON_VERSION:
     logger.warning(
diff --git a/setup.py b/setup.py
@@ -48,8 +48,14 @@
         "onnx-graphsurgeon",
         "onnx~=1.19.0",
         "onnxconverter-common~=1.16.0",
-        "onnxruntime~=1.22.0 ; platform_machine == 'aarch64' or platform_system == 'Darwin'",
-        "onnxruntime-gpu~=1.22.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin'",
+        # ORT with Python > 3.10
+        "onnxruntime~=1.24.2 ; python_version > '3.10' and (platform_machine == 'aarch64' or platform_system == 'Darwin')",  # noqa: E501
+        "onnxruntime-gpu~=1.24.2 ; python_version > '3.10' and platform_machine != 'aarch64' and platform_system != 'Darwin' and platform_system != 'Windows'",  # noqa: E501
+        # ORT with Python <= 3.10
+        "onnxruntime~=1.22.0 ; python_version <= '3.10' and (platform_machine == 'aarch64' or platform_system == 'Darwin')",  # noqa: E501
+        "onnxruntime-gpu~=1.22.0 ; python_version <= '3.10' and platform_machine != 'aarch64' and platform_system != 'Darwin' and platform_system != 'Windows'",  # noqa: E501
+        # ORT for Windows
+        "onnxruntime-gpu==1.22.0; platform_system == 'Windows'",
         "onnxscript",  # For autocast opset conversion and test_onnx_dynamo_export unit test
         "onnxslim>=0.1.76",
         "polygraphy>=0.49.22",
diff --git a/tests/unit/onnx/test_quantize_api.py b/tests/unit/onnx/test_quantize_api.py
@@ -36,6 +36,7 @@
 
 # onnxruntime version that supports opset 22+
 ORT_VERSION_FOR_OPSET_22 = version.parse("1.23.0")
+TORCH_VERSION_FOR_OPSET_22 = version.parse("2.8.0")
 
 
 # Test scenarios: (scenario_name, export_opset_offset, request_opset_offset, expected_opset_offset)
@@ -86,6 +87,11 @@ def test_quantize_opset_handling(
             pytest.skip(
                 f"Opset {max_opset} requires onnxruntime >= {ORT_VERSION_FOR_OPSET_22}, have {ort_version}"
             )
+        torch_version = version.parse(torch.__version__)
+        if torch_version < TORCH_VERSION_FOR_OPSET_22:
+            pytest.skip(
+                f"Opset {max_opset} requires torch >= {TORCH_VERSION_FOR_OPSET_22}, have {torch_version}"
+            )
 
     # Setup: create and export model
     model_torch = SimpleMLP()