Revert "Revert "Arm backend: Run adaptive_avg_pool2d before quantization"" (#17831)

rascani · web-flow · commit f689edf0d00c · 2026-03-16T15:18:15.000-07:00
Reverts #17595 to essentially reland #17494. Original summary from @gggekov here: To run mobilenet_v2 with good performance on Ethos-U55, we need to export the model in channels_last. If we export in channels_first (default behaviour), we pay a hefty performance penalty because the Ethos-U55 hardware is not efficient at doing Transpose (see details in #17157). The adaptive_avg_pool2d operator, part of mv2, is traced differently by ExecuTorch depending on whether it was exported in channels-first(operator not decomposed) or exported in channels-last (operator is decomposed by ExecuTorch in to_edge). To work around that, we add adaptive_avg_pool2d to the transform_for_annotation pipeline in order to decompose the operator before quantization.
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -470,6 +470,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
                 DecomposeLeakyReLUPass(tfa_pass=True),
                 DecomposeLinalgVectorNormPass(tfa_pass=True),
                 DecomposeSqrtPass(tfa_pass=True),
+                DecomposeAdaptiveAvgPool2dPass(tfa_pass=True),
                 DecomposeAvgPool2dPass(tfa_pass=True),
                 DecomposeSoftmaxUnstablePass(tfa_pass=True),
                 DecomposeSoftmaxPass(
diff --git a/backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py b/backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py
@@ -49,7 +49,7 @@ class DecomposeAdaptiveAvgPool2dPass(ArmPass):
     _passes_required_after: Set[Type[ExportPass]] = {DecomposeAvgPool2dPass}
 
     def call_operator(self, op, args, kwargs, meta, updated=False):
-        if op not in (edge_ops + aten_ops):
+        if op not in (edge_ops + aten_ops) or not self.allowed_to_transform(meta):
             return super().call_operator(op, args, kwargs, meta, updated)
 
         avg_pool2d_op, slice_op, cat_op = _get_decomposition(op)
diff --git a/backends/arm/ethosu/backend.py b/backends/arm/ethosu/backend.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -105,9 +105,10 @@ def test_mv2_tosa_INT(per_channel_quantization):
 @common.XfailIfNoCorstone300
 @common.parametrize("per_channel_quantization", quant_test_data)
 def test_mv2_u55_INT(per_channel_quantization):
+    input_tensor = model_inputs[0].to(memory_format=torch.channels_last)
     pipeline = EthosU55PipelineINT[input_t](
         mv2,
-        model_inputs,
+        (input_tensor,),
         aten_ops=[],
         exir_ops=[],
         use_to_edge_transform_and_lower=True,
@@ -122,9 +123,10 @@ def test_mv2_u55_INT(per_channel_quantization):
 @common.XfailIfNoCorstone320
 @common.parametrize("per_channel_quantization", quant_test_data)
 def test_mv2_u85_INT(per_channel_quantization):
+    input_tensor = model_inputs[0].to(memory_format=torch.channels_last)
     pipeline = EthosU85PipelineINT[input_t](
         mv2,
-        model_inputs,
+        (input_tensor,),
         aten_ops=[],
         exir_ops=[],
         use_to_edge_transform_and_lower=True,
diff --git a/backends/arm/test/ops/test_avg_pool2d.py b/backends/arm/test/ops/test_avg_pool2d.py
@@ -122,9 +122,11 @@ def forward(self, x: torch.Tensor):
         AvgPool2d(3, (1, 3), 1, count_include_pad=False),
         (torch.rand(1, 16, 54, 54),),
     ),
-    "becomes_mean_rank3": lambda: (BecomesMeanInToEdge(), (torch.rand(2, 8, 8),)),
-    "becomes_mean_rank4": lambda: (BecomesMeanInToEdge(), (torch.rand(2, 2, 8, 8),)),
-    "becomes_mean_rank5": lambda: (BecomesMeanInToEdge(), (torch.rand(2, 2, 8, 8),)),
+    "becomes_mean_rank4": lambda: (BecomesMeanInToEdge(), (torch.rand(1, 2, 8, 8),)),
+    "channels_last_adaptive_avg_pool": lambda: (
+        BecomesMeanInToEdge(),
+        (torch.randn(1, 1280, 7, 7).to(memory_format=torch.channels_last),),
+    ),
 }
 
 test_modules_bf16 = {
diff --git a/backends/arm/test/ops/test_mean_dim.py b/backends/arm/test/ops/test_mean_dim.py
@@ -53,8 +53,8 @@ def test_adaptive_avg_pool2d_tosa_INT(test_data):
     TosaPipelineINT[input_t](
         AdaptiveAveragePool2d(),
         test_data(),
-        AdaptiveAveragePool2d.aten_op,
-        AdaptiveAveragePool2d.exir_op,
+        [],
+        [],
         symmetric_io_quantization=True,
     ).run()
 
@@ -65,8 +65,8 @@ def test_adaptive_avg_pool2d_u55_INT(test_data):
     EthosU55PipelineINT[input_t](
         AdaptiveAveragePool2d(),
         test_data(),
-        AdaptiveAveragePool2d.aten_op,
-        AdaptiveAveragePool2d.exir_op,
+        [],
+        [],
         symmetric_io_quantization=True,
     ).run()
 
@@ -77,8 +77,8 @@ def test_adaptive_avg_pool2d_u85_INT(test_data):
     EthosU85PipelineINT[input_t](
         AdaptiveAveragePool2d(),
         test_data(),
-        AdaptiveAveragePool2d.aten_op,
-        AdaptiveAveragePool2d.exir_op,
+        [],
+        [],
         symmetric_io_quantization=True,
     ).run()
 
@@ -102,8 +102,8 @@ def test_adaptive_avg_pool2d_vgf_quant(test_data):
     pipeline = VgfPipeline[input_t](
         AdaptiveAveragePool2d(),
         test_data(),
-        AdaptiveAveragePool2d.aten_op,
-        AdaptiveAveragePool2d.exir_op,
+        [],
+        [],
         symmetric_io_quantization=True,
         quantize=True,
     )
diff --git a/backends/arm/test/quantizer/test_selective_quantization.py b/backends/arm/test/quantizer/test_selective_quantization.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -172,8 +172,8 @@ def test_mv3_selective_quant_float32_tosa_INT():
     inputs = (normalize(torch.randn(1, 3, 224, 224)),)
 
     quantization_annotations = {
-        "aten.adaptive_avg_pool2d.default": {
-            None: 1,
+        "aten.conv2d.default": {
+            None: 14,
         },
     }
 
@@ -182,12 +182,11 @@ def test_mv3_selective_quant_float32_tosa_INT():
         inputs,
         quantizer=get_selective_quantizer_by_module_name(
             {
-                "features.11.block.2.avgpool": None,
+                "conv2d_3": None,
             }
         ),
         qspecs=quantization_annotations,
     )
-
     pipeline.run()
 
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright 2025 Arm Limited and/or its affiliates.`
	`1`	`+# Copyright 2025-2026 Arm Limited and/or its affiliates.`
`2`	`2`	`#`
`3`	`3`	`# This source code is licensed under the BSD-style license found in the`
`4`	`4`	`# LICENSE file in the root directory of this source tree.`