resolve comments

willg-nv · willg-nv · commit 7c810fda982b · 2026-03-09T05:58:59.000Z
Signed-off-by: Will Guo &lt;willg@nvidia.com&gt;
diff --git a/modelopt/onnx/quantization/autotune/__main__.py b/modelopt/onnx/quantization/autotune/__main__.py
@@ -27,12 +27,12 @@
 )
 
 DEFAULT_OUTPUT_DIR = "./autotuner_output"
-DEFAULT_NUM_SCHEMES = 30
+DEFAULT_NUM_SCHEMES = 50
 DEFAULT_QUANT_TYPE = "int8"
 DEFAULT_DQ_DTYPE = "float32"
 DEFAULT_TIMING_CACHE = str(Path(tempfile.gettempdir()) / "trtexec_timing.cache")
-DEFAULT_WARMUP_RUNS = 5
-DEFAULT_TIMING_RUNS = 20
+DEFAULT_WARMUP_RUNS = 50
+DEFAULT_TIMING_RUNS = 100
 MODE_PRESETS = {
     "quick": {"schemes_per_region": 30, "warmup_runs": 10, "timing_runs": 50},
     "default": {"schemes_per_region": 50, "warmup_runs": 50, "timing_runs": 100},
@@ -135,6 +135,8 @@ def run_autotune() -> int:
 
     log_benchmark_config(args)
     trtexec_args = getattr(args, "trtexec_benchmark_args", None)
+    if trtexec_args and isinstance(trtexec_args, str):
+        trtexec_args = trtexec_args.split()
     benchmark_instance = init_benchmark_instance(
         use_trtexec=args.use_trtexec,
         plugin_libraries=args.plugin_libraries,
@@ -258,7 +260,7 @@ def _get_autotune_parser() -> argparse.ArgumentParser:
         dest="num_schemes",
         action=_StoreWithExplicitFlag,
         explicit_attr="_explicit_num_schemes",
-        help=f"Number of schemes to test per region (default: {DEFAULT_NUM_SCHEMES}; overridden by --mode)",
+        help=f"Schemes per region (default: {DEFAULT_NUM_SCHEMES}; preset from --mode if not set)",
     )
     strategy_group.add_argument(
         "--pattern_cache",
@@ -324,15 +326,15 @@ def _get_autotune_parser() -> argparse.ArgumentParser:
         default=DEFAULT_WARMUP_RUNS,
         action=_StoreWithExplicitFlag,
         explicit_attr="_explicit_warmup_runs",
-        help=f"Number of warmup runs (default: {DEFAULT_WARMUP_RUNS}; overridden by --mode)",
+        help=f"Number of warmup runs (default: {DEFAULT_WARMUP_RUNS}; preset from --mode applies if not set)",
     )
     trt_group.add_argument(
         "--timing_runs",
         type=int,
         default=DEFAULT_TIMING_RUNS,
         action=_StoreWithExplicitFlag,
         explicit_attr="_explicit_timing_runs",
-        help=f"Number of timing runs (default: {DEFAULT_TIMING_RUNS}; overridden by --mode)",
+        help=f"Number of timing runs (default: {DEFAULT_TIMING_RUNS}; preset from --mode applies if not set)",
     )
     trt_group.add_argument(
         "--plugin_libraries",
diff --git a/tests/_test_utils/onnx/quantization/autotune/models.py b/tests/_test_utils/onnx/quantization/autotune/models.py
@@ -32,7 +32,12 @@ def _create_simple_conv_onnx_model():
         "output", onnx.TensorProto.FLOAT, [64, 64, 224, 224]
     )
     conv_node = helper.make_node(
-        "Conv", inputs=["input", "conv_weight"], outputs=["conv_out"], name="conv"
+        "Conv",
+        inputs=["input", "conv_weight"],
+        outputs=["conv_out"],
+        name="conv",
+        kernel_shape=[3, 3],
+        pads=[1, 1, 1, 1],
     )
     relu_node = helper.make_node("Relu", inputs=["conv_out"], outputs=["output"], name="relu")
     graph = helper.make_graph(

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,12 @@ def _create_simple_conv_onnx_model():`
`32`	`32`	`"output", onnx.TensorProto.FLOAT, [64, 64, 224, 224]`
`33`	`33`	`)`
`34`	`34`	`conv_node = helper.make_node(`
`35`		`- "Conv", inputs=["input", "conv_weight"], outputs=["conv_out"], name="conv"`
	`35`	`+ "Conv",`
	`36`	`+ inputs=["input", "conv_weight"],`
	`37`	`+ outputs=["conv_out"],`
	`38`	`+ name="conv",`
	`39`	`+ kernel_shape=[3, 3],`
	`40`	`+ pads=[1, 1, 1, 1],`
`36`	`41`	`)`
`37`	`42`	`relu_node = helper.make_node("Relu", inputs=["conv_out"], outputs=["output"], name="relu")`
`38`	`43`	`graph = helper.make_graph(`