Skip to content

Commit 6d17aa8

Browse files
authored
[QAIRT] Implement QAIRT ORT->Genie workflow (microsoft#2358)
## Describe your changes Implements a complete QAIRT workflow for converting ONNX Runtime models to Genie-compatible format through three new passes: **QairtPreparation**: Executes external preparation scripts to quantize and prepare HuggingFace models for QAIRT, with configurable caching and script parameters. **QairtGenAIBuilder**: Converts prepared models using QAIRT GenAIBuilder API with support for: - CPU and HTP backend targets - Device-specific optimizations (VTCM size, HVX threads, extended UDMA) - Model configurations (sequence lengths, multi-graph, model splits) **QairtEncapsulation**: Wraps QAIRT DLC models in ONNX protobuf format with EPContext nodes, generating genai_config.json for onnxruntime-genai compatibility. This enables end-to-end optimization of generative AI models for Qualcomm hardware accelerators. ## Checklist before requesting a review - [x] Add unit tests for this change. - [x] Make sure all tests can pass. - [x] Update documents if necessary. - [x] Lint and apply fixes to your code by running `lintrunner -a` - [x] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link
1 parent 10e7834 commit 6d17aa8

14 files changed

Lines changed: 2897 additions & 1 deletion

olive/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class Framework(StrEnumBase):
1515

1616
ONNX = "ONNX"
1717
PYTORCH = "PyTorch"
18+
QAIRT = "QAIRT"
1819
QNN = "QNN"
1920
TENSORFLOW = "TensorFlow"
2021
OPENVINO = "OpenVINO"
@@ -30,6 +31,8 @@ class ModelFileFormat(StrEnumBase):
3031
PYTORCH_SLICE_GPT_MODEL = "PyTorch.SliceGPT"
3132
TENSORFLOW_PROTOBUF = "TensorFlow.Protobuf"
3233
TENSORFLOW_SAVED_MODEL = "TensorFlow.SavedModel"
34+
QAIRT = "QAIRT"
35+
QAIRT_PREPARED = "QAIRT.Prepared"
3336
QNN_CPP = "QNN.CPP"
3437
QNN_LIB = "QNN.LIB"
3538
QNN_SERIALIZED_BIN = "QNN.SERIALIZED.BIN"

olive/model/handler/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
1010
from olive.model.handler.openvino import OpenVINOModelHandler
1111
from olive.model.handler.pytorch import PyTorchModelHandler
12+
from olive.model.handler.qairt import QairtModelHandler, QairtPreparedModelHandler
1213
from olive.model.handler.qnn import QNNModelHandler
1314
from olive.model.handler.tensorflow import TensorFlowModelHandler
1415

@@ -23,5 +24,7 @@
2324
"OpenVINOModelHandler",
2425
"PyTorchModelHandler",
2526
"QNNModelHandler",
27+
"QairtModelHandler",
28+
"QairtPreparedModelHandler",
2629
"TensorFlowModelHandler",
2730
]

olive/model/handler/qairt.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
3+
# SPDX-License-Identifier: MIT
4+
# --------------------------------------------------------------------------
5+
6+
import logging
7+
from typing import Any, Callable, Optional, Union
8+
9+
from olive.constants import Framework, ModelFileFormat
10+
from olive.hardware.accelerator import Device
11+
from olive.model.config import IoConfig
12+
from olive.model.config.registry import model_handler_registry
13+
from olive.model.handler.base import OliveModelHandler
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
@model_handler_registry("QairtPreparedModel")
19+
class QairtPreparedModelHandler(OliveModelHandler):
20+
json_config_keys: tuple[str, ...] = ("io_config", "model_file_format")
21+
22+
def __init__(
23+
self,
24+
model_path: str,
25+
model_attributes: Optional[dict[str, Any]] = None,
26+
io_config: Union[dict[str, Any], IoConfig, str, Callable] = None,
27+
model_file_format: ModelFileFormat = ModelFileFormat.QAIRT_PREPARED,
28+
):
29+
super().__init__(
30+
framework=Framework.QAIRT,
31+
model_file_format=model_file_format,
32+
model_path=model_path,
33+
model_attributes=model_attributes,
34+
io_config=io_config,
35+
)
36+
37+
@property
38+
def size_on_disk(self) -> int:
39+
"""Compute size of the model on disk."""
40+
return 0
41+
42+
def load_model(self, rank: int = None, cache_model: bool = True):
43+
raise NotImplementedError("QairtPreparedModelHandler does not support load_model")
44+
45+
def prepare_session(
46+
self,
47+
inference_settings: Union[dict[str, Any], None] = None,
48+
device: Device = Device.CPU,
49+
execution_providers: Union[str, list[str]] = None,
50+
rank: Union[int, None] = None,
51+
):
52+
raise NotImplementedError("QairtPreparedModelHandler does not support prepare_session")
53+
54+
def run_session(
55+
self,
56+
session: Any = None,
57+
inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
58+
**kwargs: dict[str, Any],
59+
) -> Any:
60+
raise NotImplementedError("QairtPreparedModelHandler does not support prepare_session")
61+
62+
63+
@model_handler_registry("QairtModel")
64+
class QairtModelHandler(OliveModelHandler):
65+
json_config_keys: tuple[str, ...] = ("io_config", "model_file_format")
66+
67+
def __init__(
68+
self,
69+
model_path: str,
70+
model_attributes: Optional[dict[str, Any]] = None,
71+
io_config: Union[dict[str, Any], IoConfig, str, Callable] = None,
72+
model_file_format: ModelFileFormat = ModelFileFormat.QAIRT,
73+
):
74+
super().__init__(
75+
framework=Framework.QAIRT,
76+
model_file_format=model_file_format,
77+
model_path=model_path,
78+
model_attributes=model_attributes,
79+
io_config=io_config,
80+
)
81+
82+
@property
83+
def size_on_disk(self) -> int:
84+
"""Compute size of the model on disk."""
85+
return 0
86+
87+
def load_model(self, rank: int = None, cache_model: bool = True):
88+
raise NotImplementedError("QairtModelHandler does not support load_model")
89+
90+
def prepare_session(
91+
self,
92+
inference_settings: Union[dict[str, Any], None] = None,
93+
device: Device = Device.CPU,
94+
execution_providers: Union[str, list[str]] = None,
95+
rank: Union[int, None] = None,
96+
):
97+
raise NotImplementedError("QairtModelHandler does not support prepare_session")
98+
99+
def run_session(
100+
self,
101+
session: Any = None,
102+
inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
103+
**kwargs: dict[str, Any],
104+
) -> Any:
105+
raise NotImplementedError("QairtModelHandler does not support prepare_session")

olive/olive_config.json

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,13 +473,41 @@
473473
"supported_algorithms": [ ],
474474
"supported_quantization_encodings": [ ]
475475
},
476+
"QairtEncapsulation": {
477+
"module_path": "olive.passes.qairt.encapsulation.QairtEncapsulation",
478+
"supported_providers": [ "QNNExecutionProvider" ],
479+
"supported_accelerators": [ "npu" ],
480+
"supported_precisions": [ "*" ],
481+
"supported_algorithms": [ ],
482+
"supported_quantization_encodings": [ ],
483+
"extra_dependencies": [ "qairt-dev" ]
484+
},
485+
"QairtGenAIBuilder": {
486+
"module_path": "olive.passes.qairt.gen_ai_builder.QairtGenAIBuilder",
487+
"supported_providers": [ "QNNExecutionProvider" ],
488+
"supported_accelerators": [ "npu" ],
489+
"supported_precisions": [ "*" ],
490+
"supported_algorithms": [ ],
491+
"supported_quantization_encodings": [ ],
492+
"extra_dependencies": [ "qairt-dev" ]
493+
},
476494
"QairtMHA2SHA": {
477495
"module_path": "olive.passes.onnx.qairt.mha2sha.QairtMHA2SHA",
478496
"supported_providers": [ "QNNExecutionProvider" ],
479497
"supported_accelerators": [ "npu" ],
480498
"supported_precisions": [ "*" ],
481499
"supported_algorithms": [ ],
482-
"supported_quantization_encodings": [ ]
500+
"supported_quantization_encodings": [ ],
501+
"extra_dependencies": [ "qairt-dev" ]
502+
},
503+
"QairtPreparation": {
504+
"module_path": "olive.passes.qairt.preparation.QairtPreparation",
505+
"supported_providers": [ "QNNExecutionProvider" ],
506+
"supported_accelerators": [ "npu" ],
507+
"supported_precisions": [ "*" ],
508+
"supported_algorithms": [ ],
509+
"supported_quantization_encodings": [ ],
510+
"extra_dependencies": [ "qairt-dev" ]
483511
},
484512
"QLoRA": {
485513
"module_path": "olive.passes.pytorch.lora.QLoRA",
@@ -661,6 +689,7 @@
661689
"onnxruntime-openvino"
662690
],
663691
"optimum": [ "optimum" ],
692+
"qairt": [ "qairt-dev[onnx]" ],
664693
"qnn": [ "onnxruntime-qnn" ],
665694
"tf": [ "tensorflow==1.15.0" ],
666695
"torch-tensorrt": [ "torch-tensorrt" ],

olive/passes/qairt/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
3+
# SPDX-License-Identifier: MIT
4+
# --------------------------------------------------------------------------

0 commit comments

Comments
 (0)