Skip to content

Commit e61a691

Browse files
committed
Qualcomm AI Engine Direct - Fixed Conv2d + PReLu fusion issue
Summary: - There is a constraint that the coefficient must be a scalar (1x1x1x1) or (1x1x1xd) for fusion, and it will be broadcasted by QNN. - Add the test cases to check the fusion is successful Test Plan: ``` python backends/qualcomm/tests/test_qnn_delegate.py TestQNNQuantizedModel.test_qnn_backend_conv2d_leaky_relu_fusion -H {HOST} -s {SERIAL} -m SM8750 -b build-android -a /path/to/executorch_artifacts TestQNNQuantizedModel.test_qnn_backend_conv2d_relu_fusion -H {HOST} -s {SERIAL} -m SM8750 -b build-android -a /path/to/executorch_artifacts TestQNNQuantizedModel.test_qnn_backend_linear_leaky_relu_fusion -H {HOST} -s {SERIAL} -m SM8750 -b build-android -a /path/to/executorch_artifacts ```
1 parent 490ec5c commit e61a691

3 files changed

Lines changed: 168 additions & 14 deletions

File tree

backends/qualcomm/builders/op_prelu.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager
99

1010
import torch
11-
from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER
1211

1312
from .node_visitor import get_parameter, NodeVisitor
1413
from .node_visitor_manager import register_node_visitor
@@ -38,19 +37,8 @@ def define_node(
3837
)
3938

4039
coeff_node = self.get_node(node.args[1])
41-
coeff = get_parameter(coeff_node, self.edge_program)
42-
coeff_tensor = torch.zeros(input_node.meta["val"].shape, dtype=coeff.dtype)
43-
# per-channel activation
44-
coeff_node_shape = coeff_node.meta["val"].shape
45-
if len(coeff_node_shape) and coeff_node_shape[0] > 1:
46-
for i in range(input_node.meta["val"].shape[1]):
47-
coeff_tensor = coeff_tensor.index_fill(1, torch.tensor([i]), coeff[i])
48-
else:
49-
coeff_tensor.fill_(coeff[0] if coeff.dim() else coeff)
50-
51-
if axis_order := input_node.meta.get(QCOM_AXIS_ORDER, None):
52-
coeff_tensor = coeff_tensor.permute(dims=axis_order).contiguous()
53-
40+
coeff_tensor = get_parameter(coeff_node, self.edge_program)
41+
# The coeff_tensor would be broadcasted to match the input shape by QNN
5442
coeff_tensor_wrapper = self.define_tensor(
5543
coeff_node,
5644
node,

backends/qualcomm/tests/models.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,16 @@ def forward(self, x):
674674
return torch.flip(x, self.dims)
675675

676676

677+
class Conv2dLeakyReLU(torch.nn.Module):
678+
def __init__(self, negative_slope=0.01):
679+
super().__init__()
680+
self.conv = torch.nn.Conv2d(32, 32, kernel_size=3, padding=1)
681+
self.leaky_relu = torch.nn.LeakyReLU(negative_slope)
682+
683+
def forward(self, x):
684+
return self.leaky_relu(self.conv(x))
685+
686+
677687
class Conv2dMaxPool2d(torch.nn.Module):
678688
def __init__(self):
679689
super().__init__()
@@ -690,6 +700,16 @@ def forward(self, x):
690700
return self.pool(self.conv(x))
691701

692702

703+
class Conv2dReLU(torch.nn.Module):
704+
def __init__(self):
705+
super().__init__()
706+
self.conv = torch.nn.Conv2d(3, 32, kernel_size=3, padding=1)
707+
self.relu = torch.nn.ReLU()
708+
709+
def forward(self, x):
710+
return self.relu(self.conv(x))
711+
712+
693713
class Conv2dSequential(torch.nn.Module):
694714
def __init__(self, bias=True, channel_last=False):
695715
super().__init__()
@@ -1480,6 +1500,16 @@ def forward(self, x):
14801500
return self.linear(x)
14811501

14821502

1503+
class LinearLeakyReLU(torch.nn.Module):
1504+
def __init__(self, negative_slope=0.01):
1505+
super().__init__()
1506+
self.linear = torch.nn.Linear(32, 32)
1507+
self.leaky_relu = torch.nn.LeakyReLU(negative_slope)
1508+
1509+
def forward(self, x):
1510+
return self.leaky_relu(self.linear(x))
1511+
1512+
14831513
class LinearNonConstantWeight(torch.nn.Module):
14841514
def __init__(self):
14851515
super().__init__()

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4774,6 +4774,98 @@ def test_qnn_backend_conv2d_max_pool2d(self):
47744774
module = self.get_qdq_module(module, sample_input)
47754775
self.lower_module_and_test_output(module, sample_input)
47764776

4777+
def test_qnn_backend_conv2d_leaky_relu_fusion(self):
4778+
if self.enable_x86_64:
4779+
self.skipTest(
4780+
"At the moment, testing is only being conducted on the device."
4781+
)
4782+
torch.manual_seed(8)
4783+
module = Conv2dLeakyReLU() # noqa: F405
4784+
sample_input = (torch.randn(1, 32, 6, 2),)
4785+
module = self.get_qdq_module(module, sample_input)
4786+
backend_options = generate_htp_compiler_spec(use_fp16=False)
4787+
compiler_spec = generate_qnn_executorch_compiler_spec(
4788+
soc_model=self.chipset_table[TestQNN.soc_model],
4789+
backend_options=backend_options,
4790+
profile_level=3,
4791+
)
4792+
with tempfile.TemporaryDirectory() as tmp_dir:
4793+
edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
4794+
module, sample_input, compiler_spec
4795+
).to_executorch()
4796+
pte_path = f"{tmp_dir}/model.pte"
4797+
with open(pte_path, "wb") as f:
4798+
edge_prog_mgr.write_to_file(f)
4799+
adb = self.get_adb_tool(pte_path)
4800+
binaries_trace = generate_optrace(
4801+
tmp_dir,
4802+
self.chipset_table[TestQNN.soc_model],
4803+
adb,
4804+
pte_path,
4805+
[sample_input],
4806+
)
4807+
htp_ops = []
4808+
for _, (_, qhas) in binaries_trace.items():
4809+
with open(qhas, "r") as qhas_file:
4810+
qhas_data = json.load(qhas_file)
4811+
for row in qhas_data["data"]["htp_op_types"]["data"]:
4812+
htp_ops.append(row["op"])
4813+
has_conv = any("ConvLayer" in op for op in htp_ops)
4814+
has_prelu = any("prelu.opt" in op.lower() for op in htp_ops)
4815+
self.assertTrue(has_conv, f"Expected Conv op in HTP ops, got: {htp_ops}")
4816+
self.assertFalse(
4817+
has_prelu,
4818+
f"Unexpected PReLU op in HTP ops (LeakyReLU lowered to PReLU), got: {htp_ops}",
4819+
)
4820+
4821+
def test_qnn_backend_conv2d_relu_fusion(self):
4822+
if self.enable_x86_64:
4823+
self.skipTest(
4824+
"At the moment, testing is only being conducted on the device."
4825+
)
4826+
torch.manual_seed(8)
4827+
module = Conv2dReLU() # noqa: F405
4828+
sample_input = (torch.randn(1, 3, 28, 28),)
4829+
module = self.get_qdq_module(module, sample_input)
4830+
backend_options = generate_htp_compiler_spec(use_fp16=False)
4831+
compiler_spec = generate_qnn_executorch_compiler_spec(
4832+
soc_model=self.chipset_table[TestQNN.soc_model],
4833+
backend_options=backend_options,
4834+
profile_level=3,
4835+
)
4836+
with tempfile.TemporaryDirectory() as tmp_dir:
4837+
edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
4838+
module, sample_input, compiler_spec
4839+
).to_executorch()
4840+
pte_path = f"{tmp_dir}/model.pte"
4841+
with open(pte_path, "wb") as f:
4842+
edge_prog_mgr.write_to_file(f)
4843+
adb = self.get_adb_tool(pte_path)
4844+
binaries_trace = generate_optrace(
4845+
tmp_dir,
4846+
self.chipset_table[TestQNN.soc_model],
4847+
adb,
4848+
pte_path,
4849+
[sample_input],
4850+
)
4851+
htp_ops = []
4852+
for _, (_, qhas) in binaries_trace.items():
4853+
with open(qhas, "r") as qhas_file:
4854+
qhas_data = json.load(qhas_file)
4855+
for row in qhas_data["data"]["htp_op_types"]["data"]:
4856+
htp_ops.append(row["op"])
4857+
has_standalone_relu = any(
4858+
op.lower() in ("q::relu", "q::relu.opt")
4859+
or (("relu" in op.lower()) and ("conv" not in op.lower()))
4860+
for op in htp_ops
4861+
)
4862+
has_conv = any("ConvLayer" in op for op in htp_ops)
4863+
self.assertTrue(has_conv, f"Expected Conv op in HTP ops, got: {htp_ops}")
4864+
self.assertFalse(
4865+
has_standalone_relu,
4866+
f"Unexpected standalone ReLU op in HTP ops, got: {htp_ops}",
4867+
)
4868+
47774869
def test_qnn_backend_conv2d_slice_copy(self):
47784870
module = Conv2dSliceCopy() # noqa: F405
47794871
sample_input = (torch.randn([2, 1, 3, 3]),)
@@ -4834,6 +4926,50 @@ def test_qnn_backend_einsum_outer_product_relu(self):
48344926
module = self.get_qdq_module(module, sample_input)
48354927
self.lower_module_and_test_output(module, sample_input)
48364928

4929+
def test_qnn_backend_linear_leaky_relu_fusion(self):
4930+
if self.enable_x86_64:
4931+
self.skipTest(
4932+
"At the moment, testing is only being conducted on the device."
4933+
)
4934+
torch.manual_seed(8)
4935+
module = LinearLeakyReLU() # noqa: F405
4936+
sample_input = (torch.randn(1, 6, 2, 32),)
4937+
module = self.get_qdq_module(module, sample_input)
4938+
backend_options = generate_htp_compiler_spec(use_fp16=False)
4939+
compiler_spec = generate_qnn_executorch_compiler_spec(
4940+
soc_model=self.chipset_table[TestQNN.soc_model],
4941+
backend_options=backend_options,
4942+
profile_level=3,
4943+
)
4944+
with tempfile.TemporaryDirectory() as tmp_dir:
4945+
edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
4946+
module, sample_input, compiler_spec
4947+
).to_executorch()
4948+
pte_path = f"{tmp_dir}/model.pte"
4949+
with open(pte_path, "wb") as f:
4950+
edge_prog_mgr.write_to_file(f)
4951+
adb = self.get_adb_tool(pte_path)
4952+
binaries_trace = generate_optrace(
4953+
tmp_dir,
4954+
self.chipset_table[TestQNN.soc_model],
4955+
adb,
4956+
pte_path,
4957+
[sample_input],
4958+
)
4959+
htp_ops = []
4960+
for _, (_, qhas) in binaries_trace.items():
4961+
with open(qhas, "r") as qhas_file:
4962+
qhas_data = json.load(qhas_file)
4963+
for row in qhas_data["data"]["htp_op_types"]["data"]:
4964+
htp_ops.append(row["op"])
4965+
has_conv = any("ConvLayer" in op for op in htp_ops)
4966+
has_prelu = any("prelu.opt" in op.lower() for op in htp_ops)
4967+
self.assertTrue(has_conv, f"Expected Conv op in HTP ops, got: {htp_ops}")
4968+
self.assertFalse(
4969+
has_prelu,
4970+
f"Unexpected PReLU op in HTP ops (LeakyReLU lowered to PReLU), got: {htp_ops}",
4971+
)
4972+
48374973
@unittest.skipIf(is_qnn_sdk_version_less_than("2.35"), "UT pass after QNN 2.35")
48384974
def test_qnn_backend_masked_softmax(self):
48394975
if self.enable_x86_64:

0 commit comments

Comments
 (0)