Arm backend: Add MAX_POOL2D tosa dialect op (#18970)

AdrianLundell · web-flow · commit 04955b257708 · 2026-04-17T13:12:29.000+02:00
Signed-off-by: Adrian Lundell &lt;adrian.lundell@arm.com&gt;
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -157,6 +157,7 @@
 from .rewrite_inplace_arithmetic_pass import RewriteInplaceArithmeticPass  # noqa
 from .rewrite_le_lt_to_ge_gt_pass import RewriteLeLtToGeGtPass  # noqa
 from .rewrite_matmul import RewriteMatmulPass  # noqa
+from .rewrite_max_pool2d_pass import RewriteMaxPool2dPass  # noqa
 from .rewrite_pad import RewritePadPass  # noqa
 from .rewrite_slice import RewriteSlicePass  # noqa
 from .rewrite_upsample import RewriteUpsamplePass  # noqa
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -134,6 +134,7 @@
     RewriteInplaceArithmeticPass,
     RewriteLeLtToGeGtPass,
     RewriteMatmulPass,
+    RewriteMaxPool2dPass,
     RewritePadPass,
     RewriteSlicePass,
     RewriteUpsamplePass,
@@ -526,6 +527,7 @@ def _tosa_pipeline(
         self.add_passes(
             [
                 RewriteUpsamplePass(),
+                RewriteMaxPool2dPass(),
                 RewriteConvPass(exported_program),
                 RewriteMatmulPass(),
                 RewritePadPass(),
diff --git a/backends/arm/_passes/rewrite_max_pool2d_pass.py b/backends/arm/_passes/rewrite_max_pool2d_pass.py
@@ -0,0 +1,65 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Set, Type
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm.operators.operator_validation_utils import (
+    adjust_pooling_pad_if_needed,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+edge_max_pool2d_ops = (exir_ops.edge.aten.max_pool2d.default,)
+
+
+def _to_2tuple(value):
+    if isinstance(value, int):
+        return (value, value)
+    if len(value) == 1:
+        return (value[0], value[0])
+    return tuple(value)
+
+
+class RewriteMaxPool2dPass(ArmPass):
+    """Rewrite max_pool2d ops to TOSA MAX_POOL2D."""
+
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in edge_max_pool2d_ops:
+            return super().call_operator(op, args, kwargs, meta)
+
+        x = args[0]
+        kernel = _to_2tuple(args[1])
+
+        if len(args) > 2 and args[2] is not None and len(args[2]) > 0:
+            stride = _to_2tuple(args[2])
+        else:
+            stride = kernel
+
+        padding = _to_2tuple(args[3]) if len(args) > 3 else (0, 0)
+        dilation = _to_2tuple(args[4]) if len(args) > 4 else (1, 1)
+        ceil_mode = args[5] if len(args) > 5 else False
+
+        if dilation != (1, 1):
+            return super().call_operator(op, args, kwargs, meta)
+
+        # TOSA MAX_POOL2D pad order is [top, bottom, left, right]
+        pad = [padding[0], padding[0], padding[1], padding[1]]
+        pad[1] = adjust_pooling_pad_if_needed(
+            x.data.shape[2], kernel[0], stride[0], pad[1], ceil_mode
+        )
+        pad[3] = adjust_pooling_pad_if_needed(
+            x.data.shape[3], kernel[1], stride[1], pad[3], ceil_mode
+        )
+
+        return super().call_operator(
+            exir_ops.backend.tosa.MAX_POOL2D.default,
+            (x, list(kernel), list(stride), pad),
+            {},
+            meta,
+            updated=True,
+        )
diff --git a/backends/arm/_passes/size_adjust_input_pass.py b/backends/arm/_passes/size_adjust_input_pass.py
@@ -11,6 +11,7 @@
     expand_around_channel,
 )
 from executorch.backends.arm._passes.rewrite_conv_pass import RewriteConvPass
+from executorch.backends.arm._passes.rewrite_max_pool2d_pass import RewriteMaxPool2dPass
 from executorch.backends.arm.tosa.specification import get_context_shape_env
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -201,6 +202,7 @@ class SizeAdjustInputPass(ArmPass):
 
     _passes_required_after: Set[Type[ExportPass]] = {
         RewriteConvPass,
+        RewriteMaxPool2dPass,
     }
 
     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
@@ -32,7 +32,6 @@
     op_gt,
     op_log,
     op_logical_not,
-    op_max_pool2d,
     op_maximum,
     op_minimum,
     op_mul,
@@ -55,6 +54,7 @@
     op_tosa_depthwise_conv2d,
     op_tosa_gather,
     op_tosa_matmul,
+    op_tosa_max_pool2d,
     op_tosa_pad,
     op_tosa_rescale,
     op_tosa_resize,
diff --git a/backends/arm/operators/op_tosa_max_pool2d.py b/backends/arm/operators/op_tosa_max_pool2d.py
@@ -14,7 +14,6 @@
     register_node_visitor,
 )
 from executorch.backends.arm.operators.operator_validation_utils import (
-    adjust_pooling_pad_if_needed,
     validate_num_inputs,
     validate_same_dtype,
     validate_valid_dtype,
@@ -24,10 +23,9 @@
 
 @register_node_visitor
 class MaxPool2dVisitor(NodeVisitor):
-    target = "aten.max_pool2d.default"
+    """Visitor for lowering TOSA MAX_POOL2D operator."""
 
-    def __init__(self, *args):
-        super().__init__(*args)
+    target = "tosa.MAX_POOL2D.default"
 
     def define_node(
         self,
@@ -36,59 +34,26 @@ def define_node(
         inputs: List[TosaArg],
         output: TosaArg,
     ) -> None:
-        validate_num_inputs(self.target, inputs, [3, 4, 5, 6])
+        validate_num_inputs(self.target, inputs, [4])
         validate_same_dtype(self.target, [inputs[0], output], ts)
+
+        input_tensor, kernel, stride, pad = inputs
+
         supported_dtypes = [ts.DType.INT8, ts.DType.FP16, ts.DType.FP32, ts.DType.BF16]
         if self.tosa_spec.support_extension("int16"):
             supported_dtypes.append(ts.DType.INT16)
         validate_valid_dtype(
             self.target,
-            [inputs[0], output],
+            [input_tensor, output],
             supported_dtypes,
             self.tosa_spec,
         )
 
-        input_tensor = inputs[0]
-        kernel_size = inputs[1].special
-        stride = inputs[2].special
-
-        if len(inputs) == 6:
-            ceil_mode = bool(inputs[5].number)
-        else:
-            ceil_mode = False
-
-        try:
-            pad_size_list = inputs[3].special
-            pad_size_list = [
-                pad_size_list[0],
-                pad_size_list[0],
-                pad_size_list[1],
-                pad_size_list[1],
-            ]
-        except (IndexError, AttributeError):
-            pad_size_list = [0, 0, 0, 0]
-
-        # Adjust the padding as necessary
-        pad_size_list[1] = adjust_pooling_pad_if_needed(
-            input_tensor.shape[2],
-            kernel_size[0],
-            stride[0],
-            pad_size_list[1],
-            ceil_mode,
-        )
-        pad_size_list[3] = adjust_pooling_pad_if_needed(
-            input_tensor.shape[3],
-            kernel_size[1],
-            stride[1],
-            pad_size_list[3],
-            ceil_mode,
-        )
-
         attr = ts.TosaSerializerAttribute()
         attr.MaxPool2dAttribute(
-            kernel=kernel_size,
-            stride=stride,
-            pad=pad_size_list,
+            kernel=kernel.special,
+            stride=stride.special,
+            pad=pad.special,
             nan_mode=ts.NanPropagationMode.PROPAGATE,
         )
 
diff --git a/backends/arm/test/passes/test_rewrite_max_pool2d_pass.py b/backends/arm/test/passes/test_rewrite_max_pool2d_pass.py
@@ -0,0 +1,69 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Dict, Protocol, Tuple
+
+import torch
+from executorch.backends.arm._passes.remove_getitem_pass import RemoveGetItemPass
+from executorch.backends.arm._passes.rewrite_max_pool2d_pass import RewriteMaxPool2dPass
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import PassPipeline
+
+input_t = Tuple[torch.Tensor]
+
+
+class ModuleWithInputs(Protocol):
+    def get_inputs(self) -> input_t: ...
+
+
+class MaxPool2dWithStride(torch.nn.Module):
+    def get_inputs(self) -> input_t:
+        return (torch.rand(1, 3, 8, 8),)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.max_pool2d(x, kernel_size=2, stride=2)
+
+
+class MaxPool2dWithoutStride(torch.nn.Module):
+    def get_inputs(self) -> input_t:
+        return (torch.rand(1, 3, 8, 8),)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.max_pool2d(x, kernel_size=3)
+
+
+class MaxPool2dListKernel(torch.nn.Module):
+    def get_inputs(self) -> input_t:
+        return (torch.rand(1, 3, 8, 8),)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.nn.functional.max_pool2d(x, kernel_size=[2, 3])
+
+
+modules: Dict[str, ModuleWithInputs] = {
+    "max_pool2d_with_stride": MaxPool2dWithStride(),
+    "max_pool2d_without_stride": MaxPool2dWithoutStride(),
+    "max_pool2d_list_kernel": MaxPool2dListKernel(),
+}
+
+
+@common.parametrize("module", modules)
+def test_rewrite_max_pool2d_tosa(module: ModuleWithInputs) -> None:
+    nn_module = cast(torch.nn.Module, module)
+    pipeline = PassPipeline[input_t](
+        nn_module,
+        module.get_inputs(),
+        ops_before_pass={
+            "executorch_exir_dialects_edge__ops_aten_max_pool2d_with_indices_default": 1,
+        },
+        ops_after_pass={
+            "executorch_exir_dialects_backend__ops_tosa_MAX_POOL2D_default": 1,
+        },
+        pass_list=[RemoveGetItemPass, RewriteMaxPool2dPass],
+    )
+    pipeline.pop_stage(
+        "run_method_and_compare_outputs"
+    )  # Cannnot run aten graph with tosa dialect ops
+    pipeline.run()
diff --git a/backends/arm/tosa/dialect/__init__.py b/backends/arm/tosa/dialect/__init__.py
@@ -10,6 +10,7 @@
     depthwise_conv2d,
     gather,
     matmul,
+    max_pool2d,
     pad,
     rescale,
     resize,
diff --git a/backends/arm/tosa/dialect/ops/max_pool2d.py b/backends/arm/tosa/dialect/ops/max_pool2d.py
@@ -0,0 +1,75 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Union
+
+import torch
+from executorch.backends.arm.tosa.dialect.lib import TosaValueError
+from executorch.backends.arm.tosa.dialect.ops_registration import register_fake_tosa_op
+from executorch.backends.arm.tosa.specification import (
+    get_context_spec,
+    TosaSpecification,
+)
+
+
+@register_fake_tosa_op(
+    "MAX_POOL2D(Tensor input, int[2] kernel, int[2] stride, SymInt[4] pad) -> Tensor",
+    TosaSpecification.all_versions_and_profiles(),
+)
+def MAX_POOL2D(
+    x: torch.Tensor,
+    kernel: List[int],
+    stride: List[int],
+    pad: List[Union[int, torch.SymInt]],
+) -> torch.Tensor:
+    """Compute output meta for a TOSA MAX_POOL2D operation."""
+    tosa_spec = get_context_spec()
+
+    supported_int_types = [torch.int8]
+    supported_float_types = [
+        torch.float16,
+        torch.float32,
+    ]
+    if tosa_spec.support_extension("bf16"):
+        supported_float_types.append(torch.bfloat16)
+    if tosa_spec.support_extension("int16"):
+        supported_int_types.append(torch.int16)
+
+    if x.dtype in supported_int_types:
+        if not tosa_spec.support_integer():
+            raise TosaValueError(
+                f"TOSA spec {tosa_spec} doesn't support integer pools", op="MAX_POOL2D"
+            )
+    elif x.dtype in supported_float_types:
+        if not tosa_spec.support_float():
+            raise TosaValueError(
+                f"TOSA spec {tosa_spec} doesn't support float pools", op="MAX_POOL2D"
+            )
+    else:
+        raise TosaValueError(
+            f"Unsupported input dtype {x.dtype} for TOSA MAX_POOL2D", op="MAX_POOL2D"
+        )
+
+    if x.dim() != 4:
+        raise TosaValueError(
+            f"MAX_POOL2D requires a 4D tensor, got {x.dim()}D", op="MAX_POOL2D"
+        )
+
+    if len(kernel) != 2 or len(stride) != 2 or len(pad) != 4:
+        raise TosaValueError(
+            f"MAX_POOL2D expects kernel of length 2, stride of length 2, pad of "
+            f"length 4; got kernel={kernel}, stride={stride}, pad={pad}",
+            op="MAX_POOL2D",
+        )
+
+    n, c, h, w = x.shape
+    k_h, k_w = kernel
+    s_h, s_w = stride
+    # TOSA MAX_POOL2D pad order is [top, bottom, left, right]
+    p_top, p_bot, p_left, p_right = pad
+
+    h_out = (h + p_top + p_bot - k_h) // s_h + 1
+    w_out = (w + p_left + p_right - k_w) // s_w + 1
+    return torch.empty(size=[n, c, h_out, w_out], dtype=x.dtype)

Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@`
`11`	`11`	`expand_around_channel,`
`12`	`12`	`)`
`13`	`13`	`from executorch.backends.arm._passes.rewrite_conv_pass import RewriteConvPass`
	`14`	`+from executorch.backends.arm._passes.rewrite_max_pool2d_pass import RewriteMaxPool2dPass`
`14`	`15`	`from executorch.backends.arm.tosa.specification import get_context_shape_env`
`15`	`16`	`from executorch.exir.dialects._ops import ops as exir_ops`
`16`	`17`	`from executorch.exir.pass_base import ExportPass, PassResult`
`@@ -201,6 +202,7 @@ class SizeAdjustInputPass(ArmPass):`
`201`	`202`
`202`	`203`	`_passes_required_after: Set[Type[ExportPass]] = {`
`203`	`204`	`RewriteConvPass,`
	`205`	`+ RewriteMaxPool2dPass,`
`204`	`206`	`}`
`205`	`207`
`206`	`208`	`def call(self, graph_module: torch.fx.GraphModule) -> PassResult:`