Skip to content

Commit a37a757

Browse files
committed
Arm backend: Run adaptive_avg_pool2d before quantization
To run mobilenet_v2 with good performance on Ethos-U55, we need to export the model in channels_last. If we export in channels_first (default behaviour), we pay a hefty performance penalty because the Ethos-U55 hardware is not efficient at doing Transpose (see details in #17157). The adaptive_avg_pool2d operator, part of mv2, is traced differently by ExecuTorch depending on whether it was exported in channels-first(operator not decomposed) or exported in channels-last (operator is decomposed by ExecuTorch in to_edge). To work around that, we add adaptive_avg_pool2d to the transform_for_annotation pipeline in order to decompose the operator before quantization. Signed-off-by: George Gekov <george.gekov@arm.com> Change-Id: I3e98a2d52f6d0e7c79f82188a5e6c4eb6a63448b
1 parent a24d3e7 commit a37a757

7 files changed

Lines changed: 24 additions & 20 deletions

File tree

backends/arm/_passes/arm_pass_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
446446
DecomposeLeakyReLUPass(tfa_pass=True),
447447
DecomposeLinalgVectorNormPass(tfa_pass=True),
448448
DecomposeSqrtPass(tfa_pass=True),
449+
DecomposeAdaptiveAvgPool2dPass(tfa_pass=True),
449450
DecomposeAvgPool2dPass(tfa_pass=True),
450451
DecomposeSoftmaxUnstablePass(tfa_pass=True),
451452
DecomposeSoftmaxPass(tfa_pass=True),

backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class DecomposeAdaptiveAvgPool2dPass(ArmPass):
4949
_passes_required_after: Set[Type[ExportPass]] = {DecomposeAvgPool2dPass}
5050

5151
def call_operator(self, op, args, kwargs, meta, updated=False):
52-
if op not in (edge_ops + aten_ops):
52+
if op not in (edge_ops + aten_ops) or not self.allowed_to_transform(meta):
5353
return super().call_operator(op, args, kwargs, meta, updated)
5454

5555
avg_pool2d_op, slice_op, cat_op = _get_decomposition(op)

backends/arm/ethosu/backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2025 Arm Limited and/or its affiliates.
1+
# Copyright 2025-2026 Arm Limited and/or its affiliates.
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.

backends/arm/test/models/test_mobilenet_v2_arm.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,10 @@ def test_mv2_tosa_INT(per_channel_quantization):
105105
@common.XfailIfNoCorstone300
106106
@common.parametrize("per_channel_quantization", quant_test_data)
107107
def test_mv2_u55_INT(per_channel_quantization):
108+
input_tensor = model_inputs[0].to(memory_format=torch.channels_last)
108109
pipeline = EthosU55PipelineINT[input_t](
109110
mv2,
110-
model_inputs,
111+
(input_tensor,),
111112
aten_ops=[],
112113
exir_ops=[],
113114
use_to_edge_transform_and_lower=True,
@@ -122,9 +123,10 @@ def test_mv2_u55_INT(per_channel_quantization):
122123
@common.XfailIfNoCorstone320
123124
@common.parametrize("per_channel_quantization", quant_test_data)
124125
def test_mv2_u85_INT(per_channel_quantization):
126+
input_tensor = model_inputs[0].to(memory_format=torch.channels_last)
125127
pipeline = EthosU85PipelineINT[input_t](
126128
mv2,
127-
model_inputs,
129+
(input_tensor,),
128130
aten_ops=[],
129131
exir_ops=[],
130132
use_to_edge_transform_and_lower=True,

backends/arm/test/ops/test_avg_pool2d.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,11 @@ def forward(self, x: torch.Tensor):
119119
AvgPool2d(3, (1, 3), 1, count_include_pad=False),
120120
(torch.rand(1, 16, 54, 54),),
121121
),
122-
"becomes_mean_rank3": lambda: (BecomesMeanInToEdge(), (torch.rand(2, 8, 8),)),
123-
"becomes_mean_rank4": lambda: (BecomesMeanInToEdge(), (torch.rand(2, 2, 8, 8),)),
124-
"becomes_mean_rank5": lambda: (BecomesMeanInToEdge(), (torch.rand(2, 2, 8, 8),)),
122+
"becomes_mean_rank4": lambda: (BecomesMeanInToEdge(), (torch.rand(1, 2, 8, 8),)),
123+
"channels_last_adaptive_avg_pool": lambda: (
124+
BecomesMeanInToEdge(),
125+
(torch.randn(1, 1280, 7, 7).to(memory_format=torch.channels_last),),
126+
),
125127
}
126128

127129
test_modules_bf16 = {

backends/arm/test/ops/test_mean_dim.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def test_adaptive_avg_pool2d_tosa_INT(test_data):
5353
TosaPipelineINT[input_t](
5454
AdaptiveAveragePool2d(),
5555
test_data(),
56-
AdaptiveAveragePool2d.aten_op,
57-
AdaptiveAveragePool2d.exir_op,
56+
[],
57+
[],
5858
symmetric_io_quantization=True,
5959
).run()
6060

@@ -65,8 +65,8 @@ def test_adaptive_avg_pool2d_u55_INT(test_data):
6565
EthosU55PipelineINT[input_t](
6666
AdaptiveAveragePool2d(),
6767
test_data(),
68-
AdaptiveAveragePool2d.aten_op,
69-
AdaptiveAveragePool2d.exir_op,
68+
[],
69+
[],
7070
symmetric_io_quantization=True,
7171
).run()
7272

@@ -77,8 +77,8 @@ def test_adaptive_avg_pool2d_u85_INT(test_data):
7777
EthosU85PipelineINT[input_t](
7878
AdaptiveAveragePool2d(),
7979
test_data(),
80-
AdaptiveAveragePool2d.aten_op,
81-
AdaptiveAveragePool2d.exir_op,
80+
[],
81+
[],
8282
symmetric_io_quantization=True,
8383
).run()
8484

@@ -102,8 +102,8 @@ def test_adaptive_avg_pool2d_vgf_quant(test_data):
102102
pipeline = VgfPipeline[input_t](
103103
AdaptiveAveragePool2d(),
104104
test_data(),
105-
AdaptiveAveragePool2d.aten_op,
106-
AdaptiveAveragePool2d.exir_op,
105+
[],
106+
[],
107107
symmetric_io_quantization=True,
108108
quantize=True,
109109
)

backends/arm/test/quantizer/test_selective_quantization.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2025 Arm Limited and/or its affiliates.
1+
# Copyright 2025-2026 Arm Limited and/or its affiliates.
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
@@ -172,8 +172,8 @@ def test_mv3_selective_quant_float32_tosa_INT():
172172
inputs = (normalize(torch.randn(1, 3, 224, 224)),)
173173

174174
quantization_annotations = {
175-
"aten.adaptive_avg_pool2d.default": {
176-
None: 1,
175+
"aten.conv2d.default": {
176+
None: 14,
177177
},
178178
}
179179

@@ -182,12 +182,11 @@ def test_mv3_selective_quant_float32_tosa_INT():
182182
inputs,
183183
quantizer=get_selective_quantizer_by_module_name(
184184
{
185-
"features.11.block.2.avgpool": None,
185+
"conv2d_3": None,
186186
}
187187
),
188188
qspecs=quantization_annotations,
189189
)
190-
191190
pipeline.run()
192191

193192

0 commit comments

Comments
 (0)