Refactor: PR NVIDIA#702

gcunhase · gcunhase · commit 9e00eec51cfb · 2026-03-02T15:31:11.000-05:00
Signed-off-by: gcunhase &lt;4861122+gcunhase@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/quantization/autotune/common.py b/modelopt/onnx/quantization/autotune/common.py
@@ -531,12 +531,19 @@ def add_pattern_schemes(self, pattern_schemes: PatternSchemes) -> None:
                         else:
                             # Existing scheme is better, skip new one
                             too_similar = True
+                            if scheme.latency_ms < existing_scheme.latency_ms:
+                                # New scheme is better, mark existing for replacement
+                                schemes_to_replace.append(existing_scheme)
                             break
 
                 if existing_to_remove is not None:
                     filtered_schemes.remove(existing_to_remove)
                 if not too_similar:
                     filtered_schemes.append(scheme)
+                elif schemes_to_replace:
+                    for scheme_to_replace in schemes_to_replace:
+                        filtered_schemes.remove(scheme_to_replace)
+                    filtered_schemes.append(scheme)
 
             sorted_schemes = filtered_schemes