Skip to content

Commit 35e6099

Browse files
authored
Integrate Automated QDQ autotuner - part 3.2 (#838)
## What does this PR do? This PR implements QDQAutotuner class. This class is used to drive the main Autotuner workflow. The workflow is: 1. uses RegionSearch to build regions 2. generate QDQ ONNX models and evaluate perf 3. save best model This PR is part 2/4 of #703. PR 3.1: #837 PR 3.2 #838 PR 3.3: #839 **Overview:** ? ## Testing <!-- Mention how have you tested your change if applicable. --> ## Before your PR is "*Ready for review*" <!-- If you haven't finished some of the above items you can still open `Draft` PR. --> - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes - **Did you write any new necessary tests?**: Not in this part. - **Did you add or update any necessary documentation?**: No, document will be updated in part 4. - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: No, change log will be updated when all changes are ready. ## Additional Information <!-- E.g. related issue. --> <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Introduced ONNX Q/DQ autotuning framework with automatic region discovery and pattern-based optimization. * Added model profiling and quantization scheme generation capabilities. * Enabled state persistence and quantization model export functionality. * Introduced configuration management for quantization parameters and profiling workflows. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Will Guo <willg@nvidia.com>
1 parent a415667 commit 35e6099

7 files changed

Lines changed: 2298 additions & 6 deletions

File tree

modelopt/onnx/quantization/autotune/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@
2121
"""
2222

2323
# Core data structures
24+
from .autotuner import QDQAutotuner
2425
from .benchmark import TensorRTPyBenchmark, TrtExecBenchmark
2526
from .common import (
2627
AutotunerError,
2728
AutotunerNotInitializedError,
29+
Config,
2830
InsertionScheme,
2931
InvalidSchemeError,
32+
PatternCache,
33+
PatternSchemes,
3034
Region,
3135
RegionType,
3236
)
@@ -45,9 +49,13 @@
4549
"ChildRegionInputInsertionPoint",
4650
"ChildRegionOutputInsertionPoint",
4751
"CombinedRegionSearch",
52+
"Config",
4853
"InsertionScheme",
4954
"InvalidSchemeError",
5055
"NodeInputInsertionPoint",
56+
"PatternCache",
57+
"PatternSchemes",
58+
"QDQAutotuner",
5159
"Region",
5260
"RegionPattern",
5361
"RegionType",
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Automatic Q/DQ insertion optimization for ONNX models via pattern-based profiling."""
17+
18+
from collections import Counter, deque
19+
20+
from modelopt.onnx.logging_config import logger
21+
from modelopt.onnx.quantization.autotune.autotuner_base import QDQAutotunerBase
22+
from modelopt.onnx.quantization.autotune.common import Config, PatternCache, Region, RegionType
23+
from modelopt.onnx.quantization.autotune.region_search import CombinedRegionSearch
24+
25+
26+
class QDQAutotuner(QDQAutotunerBase):
27+
"""Q/DQ autotuner with automatic region discovery around compute-intensive ops."""
28+
29+
def initialize(
30+
self, config: Config | None = None, pattern_cache: PatternCache | None = None
31+
) -> None:
32+
"""Initialize autotuner and discover optimization regions automatically.
33+
34+
Extends base class initialization by automatically searching for regions
35+
after configuration is set up. Regions are discovered using pattern-based
36+
search around compute-intensive operations.
37+
"""
38+
super().initialize(config, pattern_cache)
39+
self._search_regions()
40+
41+
@staticmethod
42+
def _visit_region_recursively(region: Region) -> list[Region]:
43+
"""Recursively traverse region hierarchy and collect all regions.
44+
45+
Performs depth-first traversal of the region tree starting from a given
46+
region. Collects the root region and all descendant regions (children,
47+
grandchildren, etc.) into a flat list.
48+
49+
Args:
50+
region: Root region to start traversal from
51+
52+
Returns:
53+
List of all regions in the subtree (including root), in pre-order DFS.
54+
"""
55+
regions = [region]
56+
57+
for child in region.get_children():
58+
regions.extend(QDQAutotuner._visit_region_recursively(child))
59+
60+
return regions
61+
62+
def _reassign_region_ids(self, regions: list[Region]) -> None:
63+
"""Reassign sequential IDs to regions in breadth-first order.
64+
65+
Traverses the region hierarchy (including children) and assigns new
66+
sequential IDs starting from 0. This ensures clean, predictable region
67+
numbering after region discovery and manipulation.
68+
69+
Args:
70+
regions: List of top-level regions (children will be processed too)
71+
"""
72+
region_id = 0
73+
74+
queue = deque(regions)
75+
76+
while queue:
77+
region = queue.popleft()
78+
region.id = region_id
79+
region_id += 1
80+
queue.extend(region.get_children())
81+
82+
def _search_regions(self) -> None:
83+
"""Discover and organize optimization regions automatically.
84+
85+
This is the core region discovery method that:
86+
1. Runs automatic region search to find optimization targets
87+
2. Flattens hierarchical structure into a list
88+
3. Prioritizes LEAF regions (contain actual nodes)
89+
4. Reassigns IDs for clean indexing
90+
91+
**Search Strategy:**
92+
Uses CombinedRegionSearch which performs:
93+
- Phase 1: Bottom-up partitioning based on divergence/convergence
94+
- Phase 2: Top-down refinement creating hierarchical structure
95+
"""
96+
logger.info("Discovering optimization regions")
97+
search = CombinedRegionSearch(
98+
self.graph,
99+
maximum_sequence_region_size=self.config.maximum_sequence_region_size,
100+
minimum_topdown_search_size=self.config.minimum_topdown_search_size,
101+
)
102+
self.regions = search.search_regions()
103+
self._reassign_region_ids(self.regions)
104+
logger.debug(f"Found {len(self.regions)} top-level regions")
105+
106+
# Flatten the hierarchy into a list of all regions
107+
all_regions = []
108+
for region in self.regions:
109+
all_regions.extend(QDQAutotuner._visit_region_recursively(region))
110+
111+
all_regions.sort(key=lambda r: r.type != RegionType.LEAF)
112+
self.regions = all_regions
113+
114+
type_counts = Counter(r.type for r in self.regions)
115+
logger.info(
116+
f"Discovery complete: {len(self.regions)} regions "
117+
f"({type_counts[RegionType.LEAF]} LEAF, {type_counts[RegionType.COMPOSITE]} COMPOSITE, "
118+
f"{type_counts[RegionType.ROOT]} ROOT)"
119+
)
120+
logger.debug("Regions prioritized: LEAF regions first for profiling")

0 commit comments

Comments
 (0)