Skip to content

Commit 8e5632a

Browse files
committed
Qualcomm AI Engine Direct - Support LPAI Backend
Summary: - Add LPAI infra - Because of the accuracy isue with quantize and dequantize operations in LPAI, Q and DQ will fallback to CPU for the LPAI backend. - Support runtime option for `fps`, `ftrt_ratio`, `clent_perf_type`, `affinity`, and `core_selection`. Refer to [QNN doc](https://docs.qualcomm.com/doc/80-63442-10/topic/lpai_backend.html#full-json-scheme) to get more details for these options. - Support LPAI in `test_qnn_delegate.py` - Refactor documentation for LPAI and GPU - Add a script to sign the skel library
1 parent ba54389 commit 8e5632a

52 files changed

Lines changed: 2634 additions & 143 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

backends/qualcomm/README.md

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,7 @@ Please check `generate_qnn_executorch_compiler_spec()` in
1616
[utils.py](utils/utils.py) for supported SoC and inference type.
1717

1818
### Supported Chipset
19-
- Snapdragon 8 Gen 1
20-
- Snapdragon 8 Gen 1+
21-
- Snapdragon 8 Gen 2
22-
- Snapdragon 8 Gen 3
23-
- Snapdragon 8 Elite
24-
- Snapdragon 8 Elite Gen 5
25-
- SA8295
26-
- SA8255
27-
- SA8797 (also used by SA8397)
28-
- SSG2115P
29-
- SSG2125P
30-
- SXR1230P
31-
- SXR2230P
32-
- SXR2330P
33-
- QCS9100
34-
- SAR2230P
35-
- SW6100
36-
- SM8845
19+
The supported Qualcomm SoCs are detailed in `QcomChipset`. Please navigate to [qc_schema.py](serialization/qc_schema.py).
3720

3821
### Adding more supported Chipset
3922
Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page.
@@ -42,6 +25,15 @@ Currently, users cannot add additional chipset models because the chipset ID is
4225
- Quantized
4326
- FP16
4427

28+
### Supported Backend Type
29+
- QNN GPU
30+
- QNN HTP
31+
- QNN LPAI
32+
- Currently, only LPAI Arch v6 is supported, which requires QNN SDK version 2.39 or higher.
33+
- Please check `_soc_info_table` in [qc_schema.py](serialization/qc_schema.py) to find the supported chipsets.
34+
- The skel library needs to be signed for LPAI to work. Please confirm if you are able to sign it. You can use [signed_library.sh](scripts/signed_library.sh) to sign the skel library.
35+
36+
4537
## Directory Structure
4638

4739
```

backends/qualcomm/_passes/fold_qdq.py

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,42 @@
55
# LICENSE file in the root directory of this source tree.
66
import torch
77
from executorch.backends.qualcomm.builders.node_visitor import dq_ops, q_ops
8-
from executorch.backends.qualcomm.builders.utils import is_parameter
9-
from executorch.backends.qualcomm.utils.constants import QCOM_BYPASS_NODE
8+
from executorch.backends.qualcomm.builders.utils import (
9+
is_graph_input,
10+
is_graph_output,
11+
is_parameter,
12+
)
13+
from executorch.backends.qualcomm.serialization.qc_schema import (
14+
QnnExecuTorchBackendType,
15+
)
16+
from executorch.backends.qualcomm.utils.constants import (
17+
QCOM_BYPASS_NODE,
18+
QCOM_FALLBACK_NODE,
19+
QCOM_QUANT_ATTRS,
20+
QCOM_QUANTIZED_IO,
21+
)
1022
from executorch.exir.dialects._ops import ops as exir_ops
1123
from executorch.exir.pass_base import ExportPass, PassResult
1224
from executorch.exir.passes import dead_code_elimination_pass
1325

26+
from .utils import get_quant_attrs
27+
1428

1529
class FoldQDQ(ExportPass):
1630
"""
1731
Erase QDQ pattern.
1832
"""
1933

20-
def __init__(self, edge_program: torch.export.ExportedProgram, force_fold=False):
34+
def __init__(
35+
self,
36+
edge_program: torch.export.ExportedProgram,
37+
force_fold=False,
38+
backend_type: QnnExecuTorchBackendType = QnnExecuTorchBackendType.kHtpBackend,
39+
):
2140
super(FoldQDQ, self).__init__()
2241
self.edge_program = edge_program
2342
self.force_fold = force_fold
43+
self.backend_type = backend_type
2444

2545
def _annotate_bypass(self, node):
2646
node.meta[QCOM_BYPASS_NODE] = True
@@ -35,20 +55,26 @@ def _fold_dq(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
3555
if n.target not in dq_ops:
3656
continue
3757

38-
# skip parameters & buffers
39-
if not self.force_fold and is_parameter(n.args[0], self.edge_program):
40-
self._annotate_bypass(n)
41-
else:
42-
for user_n in user_list:
43-
user_n.replace_input_with(n, n.args[0])
44-
graph_module.graph.erase_node(n)
58+
if not self.force_fold and (
59+
QCOM_BYPASS_NODE in n.meta or QCOM_FALLBACK_NODE in n.meta
60+
):
61+
continue
62+
63+
for user_n in user_list:
64+
user_n.replace_input_with(n, n.args[0])
65+
graph_module.graph.erase_node(n)
4566

4667
def _fold_q(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
4768
# remove q
4869
for n in graph_module.graph.nodes:
4970
if n.target not in q_ops:
5071
continue
5172

73+
if not self.force_fold and (
74+
QCOM_BYPASS_NODE in n.meta or QCOM_FALLBACK_NODE in n.meta
75+
):
76+
continue
77+
5278
to_be_removed = [n]
5379
source_n = n.args[0]
5480

@@ -72,7 +98,54 @@ def _fold_q(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
7298
for n in to_be_removed:
7399
graph_module.graph.erase_node(n)
74100

101+
def _preserve_qdq(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
102+
for n in graph_module.graph.nodes:
103+
# skip parameters & buffers
104+
if n.target in dq_ops and is_parameter(n.args[0], self.edge_program):
105+
self._annotate_bypass(n)
106+
continue
107+
108+
# TODO: In LPAI backend v6, there is an accuracy drop for the quantize and dequantize operations.
109+
# To address this, keep the quantize/dequantize operations at the model's input and output.
110+
# For example, input -> q_1 (Fallback) -> dq_1 (Bypass) -> graph -> q_2 (Bypass) -> dq_2 (Fallback) -> output
111+
# Here, q_1 and dq_2 will fallback to CPU, while q_2 and dq_1 will be bypassed in qnn_partition and folded in qnn_preprocess.
112+
if self.backend_type == QnnExecuTorchBackendType.kLpaiBackend:
113+
if (
114+
is_graph_input(n, self.edge_program)
115+
# For tagged quantized I/O, we should not fallback quantize operation.
116+
and QCOM_QUANTIZED_IO not in n.meta
117+
):
118+
user_list = list(n.users.keys())
119+
if len(user_list) > 0:
120+
q_node = user_list[0]
121+
q_node.meta[QCOM_FALLBACK_NODE] = True
122+
# Annotate the q_node since it will serve as the input for the first node during operator validation
123+
q_node.meta[QCOM_QUANT_ATTRS] = get_quant_attrs(
124+
self.edge_program, q_node
125+
)
126+
q_node.meta[QCOM_QUANTIZED_IO] = q_node.args[-1]
127+
dq_node = list(q_node.users.keys())[0]
128+
# Bypass dequantize op for graph validation by torch
129+
dq_node.meta[QCOM_BYPASS_NODE] = True
130+
# Make sure that the quantize operator isn't inserted for input in insert_io_qdq.py
131+
n.meta[QCOM_QUANTIZED_IO] = q_node.args[-1]
132+
elif (
133+
is_graph_output(n)
134+
and n.target in dq_ops
135+
# For tagged quantized I/O, we should not fallback dequantize operation.
136+
and QCOM_QUANTIZED_IO not in n.args[0].args[0].meta
137+
):
138+
n.meta[QCOM_FALLBACK_NODE] = True
139+
q_node = n.args[0]
140+
# Bypass quantize op for graph validation by torch
141+
q_node.meta[QCOM_BYPASS_NODE] = True
142+
op_node = q_node.args[0]
143+
# Make sure that the dequantize operator isn't inserted for output in insert_io_qdq.py
144+
op_node.meta[QCOM_QUANTIZED_IO] = q_node.args[-1]
145+
75146
def call(self, graph_module: torch.fx.GraphModule):
147+
if not self.force_fold:
148+
self._preserve_qdq(graph_module)
76149
self._fold_dq(graph_module)
77150
self._fold_q(graph_module)
78151
graph_module.recompile()

backends/qualcomm/_passes/qnn_pass_manager.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@
5959
from executorch.backends.qualcomm._passes.utils import (
6060
get_passes_dependency_for_capture_program,
6161
)
62+
from executorch.backends.qualcomm.serialization.qc_schema import (
63+
QnnExecuTorchBackendType,
64+
)
6265
from executorch.backends.qualcomm.utils.constants import (
6366
QCOM_PASS_ACTIVATE_KEY,
6467
QCOM_PASS_ARGS_KWARGS_DEFAULTS_KEY,
@@ -144,6 +147,7 @@ def get_to_edge_transform_passes(
144147
exported_program: ExportedProgram,
145148
passes_job: OrderedDict = None,
146149
dep_table: Dict = None,
150+
backend_type: QnnExecuTorchBackendType = QnnExecuTorchBackendType.kHtpBackend,
147151
):
148152
# TODO: remove this workaround when target could be correctly detected
149153
from executorch.backends.qualcomm.builders import node_visitor
@@ -176,6 +180,8 @@ def get_to_edge_transform_passes(
176180
kwargs = passes_job[p][QCOM_PASS_ARGS_KWARGS_DEFAULTS_KEY]
177181
if "edge_program" in kwargs:
178182
kwargs["edge_program"] = exported_program
183+
if "backend_type" in kwargs:
184+
kwargs["backend_type"] = backend_type
179185
self.add_pass(p(**kwargs))
180186
assert isinstance(
181187
self.passes[-1], ResolveDebugHandle

backends/qualcomm/_passes/utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ def get_passes_dependency_for_capture_program():
6464
AnnotateQuantAttrs,
6565
AnnotateStack,
6666
AnnotateUnbind,
67-
CanonicalizeConv,
6867
ConvertBmmToMatmul,
6968
DecomposeAny,
7069
DecomposeColIm,
@@ -103,7 +102,6 @@ def get_passes_dependency_for_capture_program():
103102
I64toI32: [RemoveRedundancy],
104103
LayoutTransform: [
105104
AnnotateQuantAttrs,
106-
CanonicalizeConv,
107105
ExpandBroadcastTensorShape,
108106
FixedLinearKeepDim,
109107
],

backends/qualcomm/partition/qnn_partitioner.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,16 @@
1212
from executorch.backends.qualcomm.builders import node_visitor_manager
1313
from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
1414
from executorch.backends.qualcomm.qnn_preprocess import QnnBackend
15+
from executorch.backends.qualcomm.serialization.qc_schema import (
16+
QnnExecuTorchBackendType,
17+
)
1518
from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
1619
flatbuffer_to_option,
1720
)
18-
from executorch.backends.qualcomm.utils.constants import QCOM_BYPASS_NODE
21+
from executorch.backends.qualcomm.utils.constants import (
22+
QCOM_BYPASS_NODE,
23+
QCOM_FALLBACK_NODE,
24+
)
1925

2026
from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import (
2127
get_current_qnn_manager,
@@ -78,6 +84,9 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
7884
)
7985
return False
8086

87+
if node.meta.get(QCOM_FALLBACK_NODE, False):
88+
return False
89+
8190
if (
8291
node.target in allow_list_operator
8392
# bypass if custom op appears
@@ -138,6 +147,9 @@ def __init__(
138147
skip_mutable_buffer (bool, optional): If True, mutable buffers are not delegated to QNN.
139148
"""
140149
self.compiler_specs_snapshot = copy.deepcopy(compiler_specs)
150+
self.backend = flatbuffer_to_option(
151+
generate_qnn_executorch_option(self.compiler_specs_snapshot)
152+
).backend_options.backend_type
141153

142154
self.delegation_spec = DelegationSpec(
143155
QnnBackend.__name__, self.compiler_specs_snapshot
@@ -188,14 +200,34 @@ def tag_nodes(
188200
# since they will all be removed in following stage
189201
node.meta["delegation_tag"] = delegation_tag
190202

203+
@staticmethod
204+
def check_partitions(
205+
backend: QnnExecuTorchBackendType, partitions: List[Any]
206+
) -> bool:
207+
pl = len(partitions)
208+
if backend == QnnExecuTorchBackendType.kLpaiBackend:
209+
# By default, there are two partitions that are always created in LPAI backend:
210+
# one for the quantized and one for the dequantized.
211+
bypass_nodes = [
212+
node
213+
for partition in partitions
214+
for node in partition.nodes
215+
if node.meta.get(QCOM_BYPASS_NODE, False)
216+
]
217+
pl -= len(bypass_nodes)
218+
if pl == 0:
219+
logging.warning("Nothing can be partitioned!")
220+
else:
221+
logging.info(f"Found {pl} subgraphs to be partitioned.")
222+
return pl != 0
223+
191224
# override
192225
def partition(self, edge_program: torch.export.ExportedProgram) -> PartitionResult:
193226
# Generate partitions by QNN op_support checker
194227
partitions = self.generate_partitions(edge_program)
195228
del self.op_support_checker
196-
197229
# If partitions are found, handle tagging of nodes, constant data, and mutated buffers for delegation
198-
if len(partitions) != 0:
230+
if self.check_partitions(self.backend, partitions):
199231
self.tag_nodes(partitions, edge_program)
200232
tag_constant_data(edge_program)
201233
if not self.skip_mutable_buffer:

0 commit comments

Comments
 (0)