diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index aafe955dd0..ae94ef2ab3 100755
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -20,6 +20,7 @@ NVIDIA Model Optimizer Changelog
 - Add ``nvfp4_omlp_only`` quantization format for NVFP4 quantization. This is similar to ``nvfp4_mlp_only`` but also quantizes the output projection layer in attention.
 - ``pass_through_bwd`` in the quantization config is now default to True. Please set it to False if you want to use STE with zeroed outlier gradients for potentially better QAT accuracy.
 - Add :meth:`compute_quantization_mse <modelopt.torch.quantization.model_quant.compute_quantization_mse>` API to measure per-quantizer mean-squared quantization error, with flexible wildcard and callable filtering.
+- **AutoQDQ**: New tool for automated Q/DQ (Quantize/Dequantize) placement optimization for ONNX models. Uses TensorRT latency measurements to choose insertion schemes that minimize inference time. Discovers regions automatically, groups them by structural pattern, and tests multiple Q/DQ schemes per pattern. Supports INT8 and FP8 quantization, pattern cache for warm-start on similar models, checkpoint/resume, and importing patterns from an existing QDQ baseline. CLI: ``python -m modelopt.onnx.quantization.autotune``. See the AutoQDQ guide in the documentation.
 
 **Misc**
 
diff --git a/modelopt/onnx/quantization/autotune/__main__.py b/modelopt/onnx/quantization/autotune/__main__.py
index 877d1a0170..cb7b3c2810 100644
--- a/modelopt/onnx/quantization/autotune/__main__.py
+++ b/modelopt/onnx/quantization/autotune/__main__.py
@@ -27,12 +27,50 @@
 )
 
 DEFAULT_OUTPUT_DIR = "./autotuner_output"
-DEFAULT_NUM_SCHEMES = 30
+DEFAULT_NUM_SCHEMES = 50
 DEFAULT_QUANT_TYPE = "int8"
 DEFAULT_DQ_DTYPE = "float32"
 DEFAULT_TIMING_CACHE = str(Path(tempfile.gettempdir()) / "trtexec_timing.cache")
-DEFAULT_WARMUP_RUNS = 5
-DEFAULT_TIMING_RUNS = 20
+DEFAULT_WARMUP_RUNS = 50
+DEFAULT_TIMING_RUNS = 100
+MODE_PRESETS = {
+    "quick": {"schemes_per_region": 30, "warmup_runs": 10, "timing_runs": 50},
+    "default": {
+        "schemes_per_region": DEFAULT_NUM_SCHEMES,
+        "warmup_runs": DEFAULT_WARMUP_RUNS,
+        "timing_runs": DEFAULT_TIMING_RUNS,
+    },
+    "extensive": {"schemes_per_region": 200, "warmup_runs": 50, "timing_runs": 200},
+}
+
+
+class _StoreWithExplicitFlag(argparse.Action):
+    """Store the value and set an 'explicit' flag on the namespace so mode presets do not override."""
+
+    def __init__(self, explicit_attr: str, *args, **kwargs):
+        self._explicit_attr = explicit_attr
+        super().__init__(*args, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values)
+        setattr(namespace, self._explicit_attr, True)
+
+
+def apply_mode_presets(args) -> None:
+    """Apply --mode preset to schemes_per_region, warmup_runs, timing_runs.
+
+    Only applies preset for an option when that option was not explicitly set on the
+    command line (explicit flags override the preset even when the value equals the default).
+    """
+    if args.mode not in MODE_PRESETS:
+        return
+    preset = MODE_PRESETS[args.mode]
+    if not getattr(args, "_explicit_num_schemes", False):
+        args.num_schemes = preset["schemes_per_region"]
+    if not getattr(args, "_explicit_warmup_runs", False):
+        args.warmup_runs = preset["warmup_runs"]
+    if not getattr(args, "_explicit_timing_runs", False):
+        args.timing_runs = preset["timing_runs"]
 
 
 def validate_file_path(path: str | None, description: str) -> Path | None:
@@ -94,12 +132,15 @@ def run_autotune() -> int:
         - 130: Interrupted by user (Ctrl+C)
     """
     args = _get_autotune_parser().parse_args()
+    apply_mode_presets(args)
     model_path = validate_file_path(args.onnx_path, "Model file")
     validate_file_path(args.qdq_baseline, "QDQ baseline model")
     output_dir = Path(args.output_dir)
 
     log_benchmark_config(args)
     trtexec_args = getattr(args, "trtexec_benchmark_args", None)
+    if trtexec_args and isinstance(trtexec_args, str):
+        trtexec_args = trtexec_args.split()
     benchmark_instance = init_benchmark_instance(
         use_trtexec=args.use_trtexec,
         plugin_libraries=args.plugin_libraries,
@@ -167,6 +208,12 @@ def _get_autotune_parser() -> argparse.ArgumentParser:
   # Basic usage
   python -m modelopt.onnx.quantization.autotune --onnx_path model.onnx
 
+  # Quick mode (fewer schemes and benchmark runs for fast iteration)
+  python -m modelopt.onnx.quantization.autotune --onnx_path model.onnx --mode quick
+
+  # Extensive mode (more schemes and runs for thorough tuning)
+  python -m modelopt.onnx.quantization.autotune --onnx_path model.onnx --mode extensive
+
   # Import patterns from QDQ baseline model
   python -m modelopt.onnx.quantization.autotune \\
       --onnx_path model.onnx --qdq_baseline baseline.onnx
@@ -198,13 +245,26 @@ def _get_autotune_parser() -> argparse.ArgumentParser:
 
     # Autotuning Strategy
     strategy_group = parser.add_argument_group("Autotuning Strategy")
+    strategy_group.add_argument(
+        "--mode",
+        type=str,
+        default="default",
+        choices=["quick", "default", "extensive"],
+        help="Preset for schemes_per_region, warmup_runs, and timing_runs. "
+        "'quick': fewer schemes/runs for fast iteration; "
+        "'default': balanced; "
+        "'extensive': more schemes/runs for thorough tuning. "
+        "Explicit --schemes_per_region, --warmup_runs, --timing_runs override the preset.",
+    )
     strategy_group.add_argument(
         "--schemes_per_region",
         "-s",
         type=int,
         default=DEFAULT_NUM_SCHEMES,
         dest="num_schemes",
-        help=f"Number of schemes to test per region (default: {DEFAULT_NUM_SCHEMES})",
+        action=_StoreWithExplicitFlag,
+        explicit_attr="_explicit_num_schemes",
+        help=f"Schemes per region (default: {DEFAULT_NUM_SCHEMES}; preset from --mode if not set)",
     )
     strategy_group.add_argument(
         "--pattern_cache",
@@ -268,13 +328,17 @@ def _get_autotune_parser() -> argparse.ArgumentParser:
         "--warmup_runs",
         type=int,
         default=DEFAULT_WARMUP_RUNS,
-        help=f"Number of warmup runs (default: {DEFAULT_WARMUP_RUNS})",
+        action=_StoreWithExplicitFlag,
+        explicit_attr="_explicit_warmup_runs",
+        help=f"Number of warmup runs (default: {DEFAULT_WARMUP_RUNS}; preset from --mode applies if not set)",
     )
     trt_group.add_argument(
         "--timing_runs",
         type=int,
         default=DEFAULT_TIMING_RUNS,
-        help=f"Number of timing runs (default: {DEFAULT_TIMING_RUNS})",
+        action=_StoreWithExplicitFlag,
+        explicit_attr="_explicit_timing_runs",
+        help=f"Number of timing runs (default: {DEFAULT_TIMING_RUNS}; preset from --mode applies if not set)",
     )
     trt_group.add_argument(
         "--plugin_libraries",
diff --git a/tests/_test_utils/onnx/quantization/autotune/models.py b/tests/_test_utils/onnx/quantization/autotune/models.py
index db9652e561..fc63f6690b 100644
--- a/tests/_test_utils/onnx/quantization/autotune/models.py
+++ b/tests/_test_utils/onnx/quantization/autotune/models.py
@@ -25,12 +25,19 @@
 
 def _create_simple_conv_onnx_model():
     """Build ONNX model: Input -> Conv -> Relu -> Output (minimal for autotuner tests)."""
-    input_tensor = helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [32, 3, 224, 224])
+    input_tensor = helper.make_tensor_value_info(
+        "input", onnx.TensorProto.FLOAT, [64, 32, 224, 224]
+    )
     output_tensor = helper.make_tensor_value_info(
-        "output", onnx.TensorProto.FLOAT, [32, 64, 224, 224]
+        "output", onnx.TensorProto.FLOAT, [64, 64, 224, 224]
     )
     conv_node = helper.make_node(
-        "Conv", inputs=["input", "conv_weight"], outputs=["conv_out"], name="conv"
+        "Conv",
+        inputs=["input", "conv_weight"],
+        outputs=["conv_out"],
+        name="conv",
+        kernel_shape=[3, 3],
+        pads=[1, 1, 1, 1],
     )
     relu_node = helper.make_node("Relu", inputs=["conv_out"], outputs=["output"], name="relu")
     graph = helper.make_graph(
@@ -40,7 +47,7 @@ def _create_simple_conv_onnx_model():
         [output_tensor],
         initializer=[
             helper.make_tensor(
-                "conv_weight", onnx.TensorProto.FLOAT, [64, 3, 3, 3], [0.1] * (64 * 3 * 3 * 3)
+                "conv_weight", onnx.TensorProto.FLOAT, [64, 32, 3, 3], [0.1] * (64 * 32 * 3 * 3)
             )
         ],
     )
diff --git a/tests/gpu/onnx/quantization/autotune/test_workflow.py b/tests/gpu/onnx/quantization/autotune/test_workflow.py
index b448135acf..8066766a9c 100644
--- a/tests/gpu/onnx/quantization/autotune/test_workflow.py
+++ b/tests/gpu/onnx/quantization/autotune/test_workflow.py
@@ -35,7 +35,6 @@ def simple_conv_model():
     return _test_models._create_simple_conv_onnx_model()
 
 
-@pytest.mark.skip(reason="TODO: Fix test and enable")
 @pytest.mark.parametrize("use_trtexec", [True, False])
 def test_export_quantized_model(use_trtexec, simple_conv_model):
     """Test exporting quantized model with Q/DQ."""
diff --git a/tests/unit/onnx/quantization/autotune/test_autotune_config.py b/tests/unit/onnx/quantization/autotune/test_autotune_config.py
index 9ec99d65d1..98274fbf81 100644
--- a/tests/unit/onnx/quantization/autotune/test_autotune_config.py
+++ b/tests/unit/onnx/quantization/autotune/test_autotune_config.py
@@ -14,11 +14,17 @@
 # limitations under the License.
 
 """
-Tests for the Config class in the autotuner.
+Tests for the Config class and CLI mode presets in the autotuner.
 
-Tests configuration parameter validation and defaults.
+Tests configuration parameter validation, defaults, and CLI --mode preset
+selection and explicit-flag precedence.
 """
 
+from modelopt.onnx.quantization.autotune.__main__ import (
+    MODE_PRESETS,
+    _get_autotune_parser,
+    apply_mode_presets,
+)
 from modelopt.onnx.quantization.autotune.common import Config
 
 
@@ -95,3 +101,96 @@ def test_pattern_cache_params(self):
 
         assert config.pattern_cache_minimum_distance == 3
         assert config.pattern_cache_max_entries_per_pattern == 10
+
+
+class TestModePresets:
+    """Test --mode preset selection and explicit-flag precedence."""
+
+    @staticmethod
+    def _parse_cli(argv):
+        """Parse argv with the autotune CLI parser and apply mode presets."""
+        parser = _get_autotune_parser()
+        args = parser.parse_args(argv)
+        apply_mode_presets(args)
+        return args
+
+    def test_mode_quick_applies_preset_when_no_explicit_flags(self):
+        """With --mode quick and no explicit schemes/warmup/timing, preset values are used."""
+        args = self._parse_cli(["--onnx_path", "model.onnx", "--mode", "quick"])
+        preset = MODE_PRESETS["quick"]
+        assert args.num_schemes == preset["schemes_per_region"]
+        assert args.warmup_runs == preset["warmup_runs"]
+        assert args.timing_runs == preset["timing_runs"]
+
+    def test_mode_default_applies_preset_when_no_explicit_flags(self):
+        """With --mode default and no explicit flags, preset values are used."""
+        args = self._parse_cli(["--onnx_path", "model.onnx", "--mode", "default"])
+        preset = MODE_PRESETS["default"]
+        assert args.num_schemes == preset["schemes_per_region"]
+        assert args.warmup_runs == preset["warmup_runs"]
+        assert args.timing_runs == preset["timing_runs"]
+
+    def test_mode_extensive_applies_preset_when_no_explicit_flags(self):
+        """With --mode extensive and no explicit flags, preset values are used."""
+        args = self._parse_cli(["--onnx_path", "model.onnx", "--mode", "extensive"])
+        preset = MODE_PRESETS["extensive"]
+        assert args.num_schemes == preset["schemes_per_region"]
+        assert args.warmup_runs == preset["warmup_runs"]
+        assert args.timing_runs == preset["timing_runs"]
+
+    def test_explicit_schemes_per_region_overrides_mode_preset(self):
+        """Explicit --schemes_per_region is kept even when it differs from preset."""
+        args = self._parse_cli(
+            ["--onnx_path", "model.onnx", "--mode", "default", "--schemes_per_region", "99"]
+        )
+        assert args.num_schemes == 99
+        assert args.warmup_runs == MODE_PRESETS["default"]["warmup_runs"]
+        assert args.timing_runs == MODE_PRESETS["default"]["timing_runs"]
+
+    def test_explicit_default_value_not_overridden_by_mode(self):
+        """Explicit --schemes_per_region 30 (parser default) is not overridden by --mode default."""
+        args = self._parse_cli(
+            ["--onnx_path", "model.onnx", "--mode", "default", "--schemes_per_region", "30"]
+        )
+        assert args.num_schemes == 30
+
+    def test_explicit_warmup_runs_overrides_mode_preset(self):
+        """Explicit --warmup_runs is kept and not overridden by preset."""
+        args = self._parse_cli(
+            ["--onnx_path", "model.onnx", "--mode", "extensive", "--warmup_runs", "3"]
+        )
+        assert args.warmup_runs == 3
+        assert args.num_schemes == MODE_PRESETS["extensive"]["schemes_per_region"]
+        assert args.timing_runs == MODE_PRESETS["extensive"]["timing_runs"]
+
+    def test_explicit_timing_runs_overrides_mode_preset(self):
+        """Explicit --timing_runs is kept and not overridden by preset."""
+        args = self._parse_cli(
+            ["--onnx_path", "model.onnx", "--mode", "quick", "--timing_runs", "7"]
+        )
+        assert args.timing_runs == 7
+        assert args.num_schemes == MODE_PRESETS["quick"]["schemes_per_region"]
+        assert args.warmup_runs == MODE_PRESETS["quick"]["warmup_runs"]
+
+    def test_multiple_explicit_overrides_mode_preset(self):
+        """Multiple explicit flags override only their respective preset values."""
+        args = self._parse_cli(
+            [
+                "--onnx_path",
+                "model.onnx",
+                "--mode",
+                "extensive",
+                "--schemes_per_region",
+                "10",
+                "--timing_runs",
+                "5",
+            ]
+        )
+        assert args.num_schemes == 10
+        assert args.timing_runs == 5
+        assert args.warmup_runs == MODE_PRESETS["extensive"]["warmup_runs"]
+
+    def test_short_flag_schemes_per_region_overrides_mode(self):
+        """Short form -s for schemes_per_region is treated as explicit and overrides preset."""
+        args = self._parse_cli(["--onnx_path", "model.onnx", "--mode", "default", "-s", "25"])
+        assert args.num_schemes == 25
diff --git a/tests/unit/onnx/quantization/autotune/test_pattern_cache.py b/tests/unit/onnx/quantization/autotune/test_pattern_cache.py
new file mode 100644
index 0000000000..294501ff03
--- /dev/null
+++ b/tests/unit/onnx/quantization/autotune/test_pattern_cache.py
@@ -0,0 +1,183 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Tests for PatternCache in the autotuner.
+
+Covers pattern cache creation, serialization, YAML round-trip, and scheme management.
+"""
+
+import os
+import tempfile
+
+from modelopt.onnx.quantization.autotune.common import (
+    InsertionScheme,
+    NodeInputInsertionPoint,
+    PatternCache,
+    PatternSchemes,
+)
+from modelopt.onnx.quantization.autotune.region_pattern import RegionPattern
+
+
+class TestPatternCache:
+    """Test PatternCache functionality."""
+
+    @staticmethod
+    def _create_test_pattern(signature: str, size: int = 2):
+        """Create a test RegionPattern."""
+        return RegionPattern(signature=signature, size=size)
+
+    def test_empty_cache_creation(self):
+        """Test creating an empty PatternCache."""
+        cache = PatternCache()
+        assert len(cache.pattern_schemes) == 0
+        assert cache.pattern_schemes is not None
+
+    def test_add_pattern_schemes(self):
+        """Test adding pattern schemes to cache."""
+        cache = PatternCache()
+        pattern = self._create_test_pattern("Conv->Relu")
+        ps = PatternSchemes(pattern=pattern)
+        scheme = InsertionScheme()
+        scheme.latency_ms = 10.0
+        ps.schemes.append(scheme)
+        cache.add_pattern_schemes(ps)
+        assert len(cache.pattern_schemes) == 1
+        assert cache.pattern_schemes[0].pattern_signature == "Conv->Relu"
+
+    def test_multiple_patterns(self):
+        """Test cache with multiple pattern schemes."""
+        cache = PatternCache()
+        pattern_sigs = ["Conv->Relu", "Gemm->Relu", "Conv->Add->Relu"]
+        for pattern_sig in pattern_sigs:
+            pattern = self._create_test_pattern(pattern_sig)
+            ps = PatternSchemes(pattern=pattern)
+            scheme = InsertionScheme()
+            scheme.latency_ms = 10.0 + len(pattern_sig)
+            ps.schemes.append(scheme)
+            cache.add_pattern_schemes(ps)
+        assert len(cache.pattern_schemes) == 3
+        found_patterns = [ps.pattern_signature for ps in cache.pattern_schemes]
+        for pattern_sig in pattern_sigs:
+            assert pattern_sig in found_patterns
+
+    def test_serialization_empty(self):
+        """Test serialization of empty cache."""
+        cache = PatternCache()
+        data = cache.to_dict()
+        assert "pattern_schemes" in data
+        assert len(data["pattern_schemes"]) == 0
+        restored = PatternCache.from_dict(data)
+        assert len(restored.pattern_schemes) == 0
+
+    def test_serialization_with_data(self):
+        """Test serialization with pattern schemes."""
+        cache = PatternCache(minimum_distance=0)
+        pattern = self._create_test_pattern("Conv->Relu")
+        ps = PatternSchemes(pattern=pattern)
+        scheme1 = InsertionScheme()
+        scheme1.node_inputs = [NodeInputInsertionPoint(0, 0)]
+        scheme1.latency_ms = 10.0
+        ps.schemes.append(scheme1)
+        scheme2 = InsertionScheme()
+        scheme2.node_inputs = [
+            NodeInputInsertionPoint(0, 0),
+            NodeInputInsertionPoint(1, 0),
+            NodeInputInsertionPoint(2, 0),
+            NodeInputInsertionPoint(3, 0),
+            NodeInputInsertionPoint(4, 0),
+        ]
+        scheme2.latency_ms = 12.0
+        ps.schemes.append(scheme2)
+        cache.add_pattern_schemes(ps)
+        data = cache.to_dict()
+        restored = PatternCache.from_dict(data)
+        assert len(restored.pattern_schemes) == 1
+        restored_ps = restored.pattern_schemes[0]
+        assert restored_ps.pattern_signature == "Conv->Relu"
+        assert len(restored_ps.schemes) == 2
+        assert restored_ps.best_scheme is not None
+        assert restored_ps.best_scheme.latency_ms == 10.0
+        assert restored_ps.schemes[0].latency_ms == 10.0
+
+    def test_yaml_round_trip(self):
+        """Test saving and loading cache as YAML."""
+        cache = PatternCache()
+        pattern = self._create_test_pattern("Gemm->Relu")
+        ps = PatternSchemes(pattern=pattern)
+        scheme = InsertionScheme()
+        scheme.latency_ms = 15.0
+        ps.schemes.append(scheme)
+        cache.add_pattern_schemes(ps)
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            yaml_path = f.name
+        try:
+            cache.save(yaml_path)
+            restored = PatternCache.load(yaml_path)
+            assert len(restored.pattern_schemes) == 1
+            assert restored.pattern_schemes[0].pattern_signature == "Gemm->Relu"
+            assert restored.pattern_schemes[0].schemes[0].latency_ms == 15.0
+        finally:
+            if os.path.exists(yaml_path):
+                os.unlink(yaml_path)
+
+    def test_update_cache(self):
+        """Test updating existing pattern in cache (merges schemes)."""
+        cache = PatternCache(minimum_distance=0)
+        pattern1 = self._create_test_pattern("Conv->Relu")
+        ps1 = PatternSchemes(pattern=pattern1)
+        scheme1 = InsertionScheme()
+        scheme1.latency_ms = 10.0
+        ps1.schemes.append(scheme1)
+        cache.add_pattern_schemes(ps1)
+        pattern2 = self._create_test_pattern("Conv->Relu")
+        ps2 = PatternSchemes(pattern=pattern2)
+        scheme2 = InsertionScheme()
+        scheme2.latency_ms = 8.0
+        scheme2.node_inputs = [NodeInputInsertionPoint(0, 0)]
+        ps2.schemes.append(scheme2)
+        cache.add_pattern_schemes(ps2)
+        assert len(cache.pattern_schemes) == 1
+        conv_relu_ps = cache.pattern_schemes[0]
+        assert conv_relu_ps.pattern_signature == "Conv->Relu"
+        assert len(conv_relu_ps.schemes) == 2
+        assert conv_relu_ps.best_scheme is not None
+        assert conv_relu_ps.best_scheme.latency_ms == 8.0
+
+    def test_get_best_scheme(self):
+        """Test retrieving best scheme for a pattern."""
+        cache = PatternCache(minimum_distance=0)
+        pattern = self._create_test_pattern("Conv->Relu")
+        ps = PatternSchemes(pattern=pattern)
+        scheme1 = InsertionScheme()
+        scheme1.node_inputs = [NodeInputInsertionPoint(0, 0)]
+        scheme1.latency_ms = 12.0
+        ps.schemes.append(scheme1)
+        scheme2 = InsertionScheme()
+        scheme2.node_inputs = [NodeInputInsertionPoint(1, 0)]
+        scheme2.latency_ms = 8.0
+        ps.schemes.append(scheme2)
+        scheme3 = InsertionScheme()
+        scheme3.node_inputs = [NodeInputInsertionPoint(2, 0)]
+        scheme3.latency_ms = 10.0
+        ps.schemes.append(scheme3)
+        cache.add_pattern_schemes(ps)
+        conv_relu_ps = cache.pattern_schemes[0]
+        assert conv_relu_ps.pattern_signature == "Conv->Relu"
+        assert len(conv_relu_ps.schemes) == 3
+        best = conv_relu_ps.best_scheme
+        assert best is not None
+        assert best.latency_ms == 8.0
+        latencies = sorted([s.latency_ms for s in conv_relu_ps.schemes])
+        assert latencies == [8.0, 10.0, 12.0]
diff --git a/tests/unit/onnx/quantization/autotune/test_region.py b/tests/unit/onnx/quantization/autotune/test_region.py
index a27b1c98ca..5a733017d9 100644
--- a/tests/unit/onnx/quantization/autotune/test_region.py
+++ b/tests/unit/onnx/quantization/autotune/test_region.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");