Skip to content

Commit 2759ef1

Browse files
authored
Qualcomm AI Engine Direct - Support backend awareness pass infrastructure (pytorch#20012)
### Summary Introduce a backend-aware pass manager infrastructure for the Qualcomm backend. The monolithic QnnPassManager is refactored into a base class with overridable classmethods (`get_annotation_passes`, `get_export_passes`, `get_preprocess_passes`, etc.), enabling per-backend pass customization through inheritance. Three backend subclasses pass manager are added: - HTP / GPU — add `DecomposeReciprocal` (neither supports ElementWiseUnary with reciprocal operation) - LPAI — add `DecomposeReciprocal` + new `DecomposeHardsigmoid` pass, swap `FoldQDQ` for `LpaiFoldQDQ` (preserves I/O Q/DQ to avoid v6 accuracy drop) All call sites now use `get_qnn_pass_manager_cls(backend_type)()` to get the correct pass manager instance. ### Test plan - `python -m pytest backends/qualcomm/tests/test_passes.py::TestPasses::test_decompose_hardsigmoid_backend_aware -xvs` - `python -m pytest backends/qualcomm/tests/test_passes.py::TestPasses::test_decompose_reciprocal_backend_aware -xvs` - `python backends/qualcomm/tests/test_qnn_delegate.py TestQNNQuantizedOperator.test_qnn_backend_hardsigmoid -b build-android/ -s d809c87f -m SM8850 --seed 1126 --backend lpai` cc @cccclai @cbilgin @abhinaykukkadapu
1 parent e0dfec5 commit 2759ef1

29 files changed

Lines changed: 880 additions & 366 deletions

backends/qualcomm/_passes/BUCK

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ fbcode_target(_kind = runtime.python_library,
77
name = "passes",
88
srcs = glob([
99
"*.py",
10+
"backends/**/*.py",
1011
]),
1112
visibility = ["PUBLIC"],
1213
deps = [

backends/qualcomm/_passes/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from .decompose_fill import DecomposeFill
2525
from .decompose_floor_divide import DecomposeFloorDivide
2626
from .decompose_glu import DecomposeGlu
27+
from .decompose_hardsigmoid import DecomposeHardsigmoid
2728
from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
2829
from .decompose_log_variants import DecomposeLogVariants
2930
from .decompose_maxpool3d import DecomposeMaxPool3d
@@ -84,6 +85,7 @@
8485
DecomposeFill,
8586
DecomposeFloorDivide,
8687
DecomposeGlu,
88+
DecomposeHardsigmoid,
8789
DecomposeLinalgVectorNorm,
8890
DecomposeLogVariants,
8991
DecomposeMaxPool3d,
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from .qnn_gpu_pass_manager import QnnGpuPassManager
8+
9+
__all__ = [
10+
QnnGpuPassManager,
11+
]
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from executorch.backends.qualcomm._passes import DecomposeReciprocal, RemoveRedundancy
8+
from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
9+
10+
11+
class QnnGpuPassManager(QnnPassManager):
12+
"""
13+
Pass manager for the GPU backend.
14+
15+
Extends QnnPassManager with GPU-specific graph transformations.
16+
"""
17+
18+
@classmethod
19+
def get_default_pass_activations(cls):
20+
# Reciprocal no longer appears at to_edge stage as it is decomposed in the export pipeline.
21+
# The current change is intended to proactively prepare for the upcoming deprecation of the export pipeline.
22+
pass_activations = super().get_default_pass_activations()
23+
pass_activations.extend([(DecomposeReciprocal, True)])
24+
return pass_activations
25+
26+
@classmethod
27+
def get_passes_dependency_for_capture_program(cls):
28+
# Reciprocal no longer appears at to_edge stage as it is decomposed in the export pipeline.
29+
# The current change is intended to proactively prepare for the upcoming deprecation of the export pipeline.
30+
deps = super().get_passes_dependency_for_capture_program()
31+
deps.update({DecomposeReciprocal: [RemoveRedundancy]})
32+
return deps
33+
34+
@classmethod
35+
def get_annotation_passes(cls):
36+
# The annotation pipeline is skipped for the GPU backend, as it does not
37+
# support quantized data types. Return an empty list to indicate a no-op.
38+
return []
39+
40+
@classmethod
41+
def get_export_passes(
42+
cls,
43+
convert_linear_to_conv2d: bool = False,
44+
):
45+
# DecomposeReciprocal should be placed in the export pipeline, as it depends on
46+
# LiftConstantScalarOperands to lift the scalar operand.
47+
passes = [DecomposeReciprocal]
48+
passes.extend(super().get_export_passes(convert_linear_to_conv2d))
49+
return passes
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from .qnn_htp_pass_manager import QnnHtpPassManager
8+
9+
__all__ = [
10+
QnnHtpPassManager,
11+
]
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from executorch.backends.qualcomm._passes import DecomposeReciprocal, RemoveRedundancy
8+
from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
9+
10+
11+
class QnnHtpPassManager(QnnPassManager):
12+
"""
13+
Pass manager for the HTP backend.
14+
15+
Extends QnnPassManager with HTP-specific graph transformations.
16+
"""
17+
18+
@classmethod
19+
def get_default_pass_activations(cls):
20+
# Reciprocal no longer appears at to_edge stage as it is decomposed in the export/annotation pipeline.
21+
# The current change is intended to proactively prepare for the upcoming deprecation of the export pipeline.
22+
pass_activations = super().get_default_pass_activations()
23+
pass_activations.extend([(DecomposeReciprocal, True)])
24+
return pass_activations
25+
26+
@classmethod
27+
def get_passes_dependency_for_capture_program(cls):
28+
# Reciprocal no longer appears at to_edge stage as it is decomposed in the export/annotation pipeline.
29+
# The current change is intended to proactively prepare for the upcoming deprecation of the export pipeline.
30+
deps = super().get_passes_dependency_for_capture_program()
31+
deps.update({DecomposeReciprocal: [RemoveRedundancy]})
32+
return deps
33+
34+
@classmethod
35+
def get_annotation_passes(cls):
36+
passes = [DecomposeReciprocal]
37+
passes.extend(super().get_annotation_passes())
38+
return passes
39+
40+
@classmethod
41+
def get_export_passes(
42+
cls,
43+
convert_linear_to_conv2d: bool = False,
44+
):
45+
# DecomposeReciprocal should be placed in the export pipeline, as it depends on
46+
# LiftConstantScalarOperands to lift the scalar operand.
47+
passes = [DecomposeReciprocal]
48+
passes.extend(super().get_export_passes(convert_linear_to_conv2d))
49+
return passes
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from .fold_qdq import LpaiFoldQDQ
8+
from .qnn_lpai_pass_manager import QnnLpaiPassManager
9+
10+
__all__ = [
11+
LpaiFoldQDQ,
12+
QnnLpaiPassManager,
13+
]
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import torch
8+
from executorch.backends.qualcomm._passes.fold_qdq import FoldQDQ
9+
from executorch.backends.qualcomm._passes.utils import get_quant_attrs
10+
from executorch.backends.qualcomm.builders.node_visitor import dq_ops
11+
from executorch.backends.qualcomm.builders.utils import (
12+
is_graph_input,
13+
is_graph_output,
14+
is_parameter,
15+
)
16+
from executorch.backends.qualcomm.utils.constants import (
17+
QCOM_BYPASS_NODE,
18+
QCOM_FALLBACK_NODE,
19+
QCOM_QUANT_ATTRS,
20+
QCOM_QUANTIZED_IO,
21+
)
22+
23+
24+
class LpaiFoldQDQ(FoldQDQ):
25+
"""
26+
LPAI-specific extension of FoldQDQ.
27+
28+
In LPAI backend v6, there is an accuracy drop for the quantize and
29+
dequantize operations. To address this, keep the quantize/dequantize
30+
operations at the model's input and output.
31+
32+
For example:
33+
input -> q_1 (Fallback) -> dq_1 (Bypass) -> graph -> q_2 (Bypass) -> dq_2 (Fallback) -> output
34+
35+
Here, q_1 and dq_2 will fallback to CPU, while q_2 and dq_1 will be
36+
bypassed in qnn_partition and folded in qnn_preprocess.
37+
"""
38+
39+
def _preserve_qdq(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
40+
for n in graph_module.graph.nodes:
41+
# skip parameters & buffers (base class logic)
42+
if n.target in dq_ops and is_parameter(n.args[0], self.edge_program):
43+
self._annotate_bypass(n)
44+
continue
45+
46+
if (
47+
is_graph_input(n, self.edge_program)
48+
# For tagged quantized I/O, we should not fallback quantize operation.
49+
and QCOM_QUANTIZED_IO not in n.meta
50+
):
51+
user_list = list(n.users.keys())
52+
if len(user_list) > 0:
53+
q_node = user_list[0]
54+
q_node.meta[QCOM_FALLBACK_NODE] = True
55+
# Annotate the q_node since it will serve as the input for the first node during operator validation
56+
q_node.meta[QCOM_QUANT_ATTRS] = get_quant_attrs(
57+
self.edge_program, q_node
58+
)
59+
q_node.meta[QCOM_QUANTIZED_IO] = q_node.args[-1]
60+
dq_node = list(q_node.users.keys())[0]
61+
# Bypass dequantize op for graph validation by torch
62+
dq_node.meta[QCOM_BYPASS_NODE] = True
63+
# Make sure that the quantize operator isn't inserted for input in insert_io_qdq.py
64+
n.meta[QCOM_QUANTIZED_IO] = q_node.args[-1]
65+
elif (
66+
is_graph_output(n)
67+
and n.target in dq_ops
68+
# For tagged quantized I/O, we should not fallback dequantize operation.
69+
and QCOM_QUANTIZED_IO not in n.args[0].args[0].meta
70+
):
71+
n.meta[QCOM_FALLBACK_NODE] = True
72+
q_node = n.args[0]
73+
# Bypass quantize op for graph validation by torch
74+
q_node.meta[QCOM_BYPASS_NODE] = True
75+
op_node = q_node.args[0]
76+
# Make sure that the dequantize operator isn't inserted for output in insert_io_qdq.py
77+
op_node.meta[QCOM_QUANTIZED_IO] = q_node.args[-1]
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from executorch.backends.qualcomm._passes import (
8+
DecomposeHardsigmoid,
9+
DecomposeReciprocal,
10+
FoldQDQ,
11+
RemoveRedundancy,
12+
)
13+
from executorch.backends.qualcomm._passes.backends.lpai.fold_qdq import LpaiFoldQDQ
14+
from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
15+
16+
17+
class QnnLpaiPassManager(QnnPassManager):
18+
"""
19+
Pass manager for the LPAI backend.
20+
21+
Extends QnnPassManager with LPAI-specific graph transformations.
22+
"""
23+
24+
@classmethod
25+
def get_default_pass_activations(cls):
26+
pass_activations = super().get_default_pass_activations()
27+
pass_activations = [
28+
(LpaiFoldQDQ if p is FoldQDQ else p, act) for p, act in pass_activations
29+
]
30+
# Hardsigmoid and Reciprocal no longer appear at to_edge stage as it is decomposed in the export/annotation pipeline.
31+
# The current change is intended to proactively prepare for the upcoming deprecation of the export pipeline.
32+
pass_activations.extend(
33+
[
34+
(DecomposeHardsigmoid, True),
35+
(DecomposeReciprocal, True),
36+
]
37+
)
38+
return pass_activations
39+
40+
@classmethod
41+
def get_passes_dependency_for_capture_program(cls):
42+
deps = super().get_passes_dependency_for_capture_program()
43+
# Replace FoldQDQ with LpaiFoldQDQ in the dependency table
44+
if FoldQDQ in deps:
45+
deps[LpaiFoldQDQ] = deps.pop(FoldQDQ)
46+
for key in deps:
47+
deps[key] = [LpaiFoldQDQ if v is FoldQDQ else v for v in deps[key]]
48+
# Hardsigmoid and Reciprocal no longer appear at to_edge stage as it is decomposed in the export/annotation pipeline.
49+
# The current change is intended to proactively prepare for the upcoming deprecation of the export pipeline.
50+
deps.update(
51+
{
52+
DecomposeHardsigmoid: [RemoveRedundancy],
53+
DecomposeReciprocal: [RemoveRedundancy],
54+
}
55+
)
56+
return deps
57+
58+
@classmethod
59+
def get_annotation_passes(cls):
60+
passes = [DecomposeHardsigmoid, DecomposeReciprocal]
61+
passes.extend(super().get_annotation_passes())
62+
return passes
63+
64+
@classmethod
65+
def get_export_passes(
66+
cls,
67+
convert_linear_to_conv2d: bool = False,
68+
):
69+
# Both DecomposeHardSigmoid and DecomposeReciprocal should be placed in the export
70+
# pipeline, as they rely on LiftConstantScalarOperands to lift the scalar operand.
71+
passes = [DecomposeHardsigmoid, DecomposeReciprocal]
72+
passes.extend(super().get_export_passes(convert_linear_to_conv2d))
73+
return passes
74+
75+
@classmethod
76+
def get_preprocess_passes(cls, use_mha2sha=False):
77+
passes = super().get_preprocess_passes(use_mha2sha)
78+
return [LpaiFoldQDQ if p is FoldQDQ else p for p in passes]

0 commit comments

Comments
 (0)