Skip to content

Commit b348350

Browse files
committed
Integrate Automated QDQ placement tool - part 3
Signed-off-by: Will Guo <willg@nvidia.com>
1 parent 8c6de51 commit b348350

10 files changed

Lines changed: 3295 additions & 0 deletions

File tree

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Pattern-based Q/DQ autotuning for ONNX models.
17+
18+
Optimizes Q/DQ node placement in ONNX graphs to minimize TensorRT inference latency
19+
using hierarchical region analysis and pattern-based scheme reuse.
20+
"""
21+
22+
from modelopt.onnx.quantization.autotune.common import (
23+
AutotunerError,
24+
AutotunerNotInitializedError,
25+
Config,
26+
InsertionScheme,
27+
InvalidSchemeError,
28+
PatternCache,
29+
PatternSchemes,
30+
Region,
31+
RegionError,
32+
RegionType,
33+
)
34+
35+
from .insertion_points import (
36+
ChildRegionInputInsertionPoint,
37+
NodeInputInsertionPoint,
38+
RegionOutputInsertionPoint,
39+
ResolvedInsertionPoint,
40+
)
41+
from .region_pattern import RegionPattern
42+
from .region_search import CombinedRegionSearch
43+
44+
__all__ = [
45+
"AutotunerError",
46+
"AutotunerNotInitializedError",
47+
"ChildRegionInputInsertionPoint",
48+
"CombinedRegionSearch",
49+
"Config",
50+
"InsertionScheme",
51+
"InvalidSchemeError",
52+
"NodeInputInsertionPoint",
53+
"PatternCache",
54+
"PatternSchemes",
55+
"Region",
56+
"RegionError",
57+
"RegionOutputInsertionPoint",
58+
"RegionPattern",
59+
"RegionType",
60+
"ResolvedInsertionPoint",
61+
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python3
2+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Command-line interface for ONNX Q/DQ autotuning."""
18+
19+
import sys
20+
21+
from modelopt.onnx.quantization.autotune.cli import run_autotune
22+
23+
if __name__ == "__main__":
24+
sys.exit(run_autotune())

0 commit comments

Comments
 (0)