NXP backend: Enable aten.upsample_nearest2d with new Neutron flow. (pytorch#19796)

MartinPavella · web-flow · commit c72bc872a652 · 2026-05-29T09:00:32.000+02:00
### Summary NXP backend: Enable `aten.upsample_nearest2d` with new Neutron flow. ### Test plan Unit tests provided. cc @robert-kalmar @JakeStevens @digantdesai @rascani
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_nearest2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/upsample_nearest2d_converter.py
@@ -4,24 +4,51 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+import torch
 
 from executorch.backends.nxp.backend.data_format import DataFormat, NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.edge_helper import node_has_well_defined_shape
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
+    is_not_qdq_node,
     NodeConverter,
 )
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options.resize_nearest_neighbor_options import (
     ResizeNearestNeighbor,
 )
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
+from torch.fx.passes.infra.partitioner import Partition
 from torch.nn import Parameter
 
+HeightScale = float
+WidthScale = float
+
 
 # noinspection SpellCheckingInspection
 class UpsampleNearest2DConverter(NodeConverter):
 
+    @classmethod
+    def supports_partitioning_result(
+        cls,
+        node: Node,
+        partition_list: list[Partition],
+        custom_delegation_options: CustomDelegationOptions,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+    ) -> bool:
+        h_scale, w_scale = cls._get_effective_scales(node)
+        is_alone_in_partition = cls.is_node_alone_in_partition(
+            node, partition_list, filter_fn=is_not_qdq_node
+        )
+
+        if is_alone_in_partition and h_scale == w_scale == 1:
+            # The operator is a no-op, so the Neutron Converter will skip it. If it's the only node in the
+            #  partition, the graph would end up empty.
+            return False
+
+        return True
+
     @staticmethod
     def _is_supported_in_IR(
         node: Node,
@@ -36,6 +63,14 @@ def _is_supported_in_IR(
                 " format. Please report this."
             )
 
+        # The conversion requires the output shape to be known and static.
+        if not node_has_well_defined_shape(node):
+            return False
+
+        if len(node.meta["val"].shape) != 4:
+            # Unexpected case. The input should always be 4D.
+            return False
+
         return True
 
     @staticmethod
@@ -45,39 +80,62 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        # Neutron requires static shapes.
-        #  neutron-converter/src/OperatorC/UpsamplePlugin.cpp?at=NEUTRON_SOFTWARE_2.2.3#74
-        if not node_has_well_defined_shape(node):
-            return False
-
-        if len(node.meta["val"].shape) != 4:
-            # Unexpected case. The input should always be 4D.
-            return False
-
-        # The tensors here use the channels first format (NCHW).
+        # The tensors are always 4D and use the channels first format (NCHW).
         _, in_c, in_h, in_w = node.all_input_nodes[0].meta["val"].shape
         _, _, out_h, out_w = node.meta["val"].shape
 
-        # Neutron supports only the doubling and quadrupleing of both height and width at the same time.
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#768
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#778
-        supported_scales = [2, 4]
-        if not any(
-            in_h * scale == out_h and in_w * scale == out_w
-            for scale in supported_scales
-        ):
-            return False
-
-        # Neutron requires the input channels to be a multiple of `num_macs`.
-        #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#767
-        if in_c % neutron_target_spec.get_num_macs() != 0:
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            if not NodeConverter.uses_quantization_type_for_io(
+                node,
+                supported_types=[torch.int8, torch.uint8],
+                input_indices=[0],
+                output_indices=[0],
+            ):
+                return False
+
+            supported_scales = [1, 2, 4, 8]
+            h_scale, w_scale = UpsampleNearest2DConverter._get_effective_scales(node)
+            # The H and W scales don't need to be equal but both must be supported.
+            if (h_scale not in supported_scales) or (w_scale not in supported_scales):
+                return False
+
+        else:
+            # Requirements of the old Neutron flow.
+
+            # Neutron supports only the doubling and quadrupleing of both height and width at the same time.
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#768
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#778
+            supported_scales = [2, 4]
+            if not any(
+                in_h * scale == out_h and in_w * scale == out_w
+                for scale in supported_scales
+            ):
+                return False
+
+            # Neutron requires the input channels to be a multiple of `num_macs`.
+            #  neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.3#767
+            if in_c % neutron_target_spec.get_num_macs() != 0:
+                return False
 
         return True
 
+    @staticmethod
+    def _get_effective_scales(node: Node) -> tuple[HeightScale, WidthScale]:
+        # Neutron supports variants where `align_corners=False` and `align_corners=True`. ExecuTorch doesn't have this
+        #  parameter. Its behavior is equivalent to `align_corners=False`. Hence, the scale calculation corresponds to
+        #  the `align_corners=False` case in the Neutron documentation.
+        _, _, in_h, in_w = node.all_input_nodes[0].meta["val"].shape
+        _, _, out_h, out_w = node.meta["val"].shape
+        h_scale = out_h / in_h
+        w_scale = out_w / in_w
+
+        return h_scale, w_scale
+
     def convert(self, node: Node):
         """Convert the `aten.upsample_nearest2d.vec` operator to Neutron IR `ResizeNearestNeighbor`.
-        The schema is:
+        The ExecuTorch schema is:
             aten::upsample_nearest2d.vec(
                 Tensor input,
                 SymInt[]? output_size,
@@ -90,6 +148,8 @@ def convert(self, node: Node):
         x = t_op.tmp_inputs[0]
         y = t_op.tmp_outputs[0]
 
+        # Neutron supports variants where `align_corners=False` and `align_corners=True`. ExecuTorch doesn't have this
+        #  parameter. Its behavior is equivalent to `align_corners=False` and `half_pixel_centers=False`.
         t_op.builtin_options = ResizeNearestNeighbor(False, False)
 
         # The `aten.upsample_nearest2d` can use either the `size` attribute or the `scale_factor` to define the output
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py
@@ -4,20 +4,30 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+
+# noinspection PyUnusedImports
 import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
     graph_contains_any_of_ops,
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    AddTensor,
+    ExecutorchDelegateCall,
+    UpsampleNearest2D,
+)
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -26,11 +36,6 @@ def reseed_model_per_test_run():
     np.random.seed(23)
 
 
-# noinspection PyProtectedMember
-ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate
-UpsampleNearest2D = exir_ops.edge.aten.upsample_nearest2d.vec
-
-
 class UpsampleNearestModule(torch.nn.Module):
 
     def __init__(self, size=None, scale=None):
@@ -41,6 +46,13 @@ def forward(self, x):
         return self.upsample(x)
 
 
+class UpsampleNearestAddModule(UpsampleNearestModule):
+
+    def forward(self, x):
+        x = super().forward(x)
+        return x + x
+
+
 @pytest.mark.parametrize(
     "input_shape, size",
     [
@@ -181,3 +193,120 @@ def test_convert_upsample_nearest2d__no_delegation__unsupported_size(input_shape
     # Make sure the `upsample` was NOT delegated (size != double of input).
     assert not graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
     assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleNearest2D])
+
+
+class TestUpsampleNearest2DNewNeutronFlow:
+
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(
+        self,
+        model,
+        input_shape,
+        mocker,
+        use_qat=False,
+        expected_delegated_ops=None,
+    ):
+        if expected_delegated_ops is None:
+            expected_delegated_ops = {UpsampleNearest2D: 1}
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops=expected_delegated_ops,
+            expected_non_delegated_ops={},
+        )
+
+        # Cover also negative values to thoroughly test the operator.
+        dataset_creator = RandomDatasetCreator(low=-2, high=2)
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+            dataset_creator,
+            use_qat=use_qat,
+            use_new_flow_neutron_c=True,  # Use the new flow.
+        )
+
+    # noinspection PyMethodMayBeStatic
+    def assert_not_delegated(self, model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [UpsampleNearest2D])
+
+    def test__qat(self, mocker, use_qat):
+        input_shape = (1, 2, 3, 4)
+        output_size = (6, 8)
+        model = UpsampleNearestModule(size=output_size)
+        self.assert_delegated(model, input_shape, mocker, use_qat=use_qat)
+
+    @pytest.mark.parametrize(
+        "input_shape, output_size",
+        [
+            pytest.param((1, 2, 3, 4), (6, 8), id="batch=1, scale_h=scale_w=2"),
+            pytest.param((1, 2, 3, 3), 6, id="batch=1, scale_h=scale_w=2, scalar size"),
+            pytest.param(
+                (3, 3, 3, 5),
+                (6, 5),
+                id="batch=3, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),
+            pytest.param((2, 2, 3, 4), (3, 16), id="batch=2, scale_h=1, scale_w=4"),
+            pytest.param((2, 2, 3, 4), (24, 8), id="batch=2, scale_h=8, scale_w=2"),
+        ],
+    )
+    def test__output_size(self, mocker, input_shape, output_size):
+        model = UpsampleNearestModule(size=output_size)
+        self.assert_delegated(model, input_shape, mocker)
+
+    def test__output_size__unsupported(self):
+        input_shape = (1, 2, 3, 4)
+        output_size = (9, 12)  # scale = (3, 3)
+        model = UpsampleNearestModule(size=output_size)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, scale",
+        [
+            pytest.param((1, 2, 3, 4), (2, 2), id="batch=1, scale_h=scale_w=2"),
+            pytest.param(
+                (1, 2, 3, 4), 4, id="batch=1, scale_h=scale_w=4, scalar scale"
+            ),
+            pytest.param(
+                (3, 3, 3, 5),
+                (2, 1),
+                id="batch=3, scale_h=2, scale_w=1 (no num_macs multiples)",
+            ),
+            pytest.param((2, 2, 3, 4), (4, 1), id="batch=2, scale_h=4, scale_w=1"),
+            pytest.param((2, 2, 3, 4), (2, 8), id="batch=2, scale_h=2, scale_w=8"),
+        ],
+    )
+    def test__scales(self, mocker, input_shape, scale):
+        model = UpsampleNearestModule(scale=scale)
+        self.assert_delegated(model, input_shape, mocker)
+
+    def test__scales__unsupported(self):
+        input_shape = (1, 2, 3, 4)
+        scale = (3, 3)
+        model = UpsampleNearestModule(scale=scale)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__noop__alone_in_partition__not_delegated(self):
+        input_shape = (1, 2, 3, 4)
+        scale = 1
+        model = UpsampleNearestModule(scale=scale)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__noop__not_alone_in_partition__delegated(self, mocker):
+        input_shape = (1, 2, 3, 4)
+        scale = 1
+        model = UpsampleNearestAddModule(scale=scale)
+        self.assert_delegated(
+            model,
+            input_shape,
+            mocker,
+            expected_delegated_ops={UpsampleNearest2D: 1, AddTensor: 1},
+        )