Update documentation for TRT support in Autotune: 10.16 -> 10.15

gcunhase · gcunhase · commit 4012727ab975 · 2026-04-14T12:45:45.000-04:00
Signed-off-by: gcunhase &lt;4861122+gcunhase@users.noreply.github.com&gt;
diff --git a/docs/source/guides/9_autotune.rst b/docs/source/guides/9_autotune.rst
@@ -221,6 +221,32 @@ If the model uses custom TensorRT operations, provide the plugin libraries:
        --output_dir ./results \
        --plugin_libraries /path/to/plugin1.so /path/to/plugin2.so
 
+Remote Autotuning
+-----------------------
+
+TensorRT 10.15+ supports remote autotuning in safety mode (``--safe``), which allows TensorRT's optimization process to be offloaded to a remote hardware. This is useful when optimizing models for different target GPUs without having direct access to them.
+
+To use remote autotuning during Q/DQ placement optimization, run with ``trtexec`` and pass extra args:
+
+.. code-block:: bash
+  python -m modelopt.onnx.quantization.autotune \
+      --onnx_path resnet50_Opset17_bs128.onnx \
+      --output_dir ./resnet50_remote_autotuned \
+      --schemes_per_region 50 \
+      --use_trtexec \
+      --trtexec_benchmark_args "--remoteAutoTuningConfig=\"<remote autotuning config>\" --safe --skipInference"
+```
+
+**Requirements:**
+
+* TensorRT 10.15 or later
+* Valid remote autotuning configuration
+* ``--use_trtexec`` must be set (benchmarking uses ``trtexec`` instead of the TensorRT Python API)
+* ``--safe --skipInference`` must be enabled via ``--trtexec_benchmark_args``
+
+Replace ``<remote autotuning config>`` with an actual remote autotuning configuration string (see ``trtexec --help`` for more details).
+ Other TensorRT benchmark options (e.g. ``--timing_cache``, ``--warmup_runs``, ``--timing_runs``, ``--plugin_libraries``) are also available; run ``--help`` for details.
+
 Low-Level API Usage
 ===================
 
diff --git a/examples/onnx_ptq/autotune/README.md b/examples/onnx_ptq/autotune/README.md
@@ -229,7 +229,7 @@ python3 -m modelopt.onnx.quantization.autotune \
 
 ## Remote Autotuning with TensorRT
 
-TensorRT 10.16+ supports remote autotuning in safety mode (`--safe`), which allows TensorRT's optimization process to be offloaded to a remote hardware. This is useful when optimizing models for different target GPUs without having direct access to them.
+TensorRT 10.15+ supports remote autotuning in safety mode (`--safe`), which allows TensorRT's optimization process to be offloaded to a remote hardware. This is useful when optimizing models for different target GPUs without having direct access to them.
 
 To use remote autotuning during Q/DQ placement optimization, run with `trtexec` and pass extra args:
 
@@ -239,15 +239,15 @@ python3 -m modelopt.onnx.quantization.autotune \
     --output_dir ./resnet50_remote_autotuned \
     --schemes_per_region 50 \
     --use_trtexec \
-    --trtexec_benchmark_args "--remoteAutoTuningConfig=\"<remote autotuning config>\" --safe"
+    --trtexec_benchmark_args "--remoteAutoTuningConfig=\"<remote autotuning config>\" --safe --skipInference"
 ```
 
 **Requirements:**
 
-- TensorRT 10.16 or later
+- TensorRT 10.15 or later
 - Valid remote autotuning configuration
 - `--use_trtexec` must be set (benchmarking uses `trtexec` instead of the TensorRT Python API)
-- `--safe` must be enabled via `--trtexec_benchmark_args`
+- `--safe --skipInference` must be enabled via `--trtexec_benchmark_args`
 
 Replace `<remote autotuning config>` with an actual remote autotuning configuration string (see `trtexec --help` for more details).
  Other TensorRT benchmark options (e.g. `--timing_cache`, `--warmup_runs`, `--timing_runs`, `--plugin_libraries`) are also available; run `--help` for details.
diff --git a/modelopt/onnx/quantization/__main__.py b/modelopt/onnx/quantization/__main__.py
@@ -398,8 +398,8 @@ def get_parser() -> argparse.ArgumentParser:
         type=str,
         default=None,
         help=(
-            "Additional trtexec arguments as a single quoted string. "
-            "Example: --autotune_trtexec_args '--fp16 --workspace=4096'"
+            "Additional 'trtexec' arguments as a single quoted string. Only relevant with the 'trtexec' workflow "
+            "enabled. Example (simple): '--fp16 --workspace=4096'"
         ),
     )
     return argparser
diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py
@@ -208,8 +208,8 @@ def __init__(
 
         if has_remote_config:
             try:
-                _check_for_tensorrt(min_version="10.16")
-                self.logger.debug("TensorRT Python API version >= 10.16 detected")
+                _check_for_tensorrt(min_version="10.15")
+                self.logger.debug("TensorRT Python API version >= 10.15 detected")
                 if "--safe" not in trtexec_args:
                     self.logger.warning(
                         "Remote autotuning requires '--safe' to be set. Adding it to trtexec arguments."
@@ -218,7 +218,7 @@ def __init__(
                 return
             except ImportError:
                 self.logger.warning(
-                    "Remote autotuning is not supported with TensorRT version < 10.16. "
+                    "Remote autotuning is not supported with TensorRT version < 10.15. "
                     "Removing --remoteAutoTuningConfig from trtexec arguments"
                 )
                 trtexec_args = [