diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py index fd28b077b8a..673af19310f 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/add_tensor_converter.py @@ -3,6 +3,9 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import torch + +from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, NodeConverter, @@ -23,11 +26,33 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - if NodeConverter.uses_shape_broadcasting(node): - # Shape broadcasting may require the addition of `Transpose` ops during conversion. - return False + if custom_delegation_options.use_new_flow_neutron_c: + if not NodeConverter.at_least_one_input_shape_matches_the_output_shape( + node + ): + return False - return True + # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes + # Transpose is currently not supported for new flow + if any( + input_node.meta[NXP_NODE_FORMAT].is_channels_first() + for input_node in node.all_input_nodes + ) and NodeConverter._node_inputs_ranks_not_equal(node): + return False + + supported_types = [torch.int8, torch.uint8] + if not NodeConverter.uses_quantization_type_for_io( + node, supported_types, [0, 1], [0] + ): + return False + + return True + else: + if NodeConverter.uses_shape_broadcasting(node): + # Shape broadcasting may require the addition of `Transpose` ops during conversion. + return False + + return True @staticmethod def _is_supported_in_IR( @@ -43,12 +68,13 @@ def _is_supported_in_IR( return True - # add.Tensor Node format: (Tensor self, Tensor other, *, Scalar alpha=1) def convert(self, node: Node): - """Convert 'add_tensor' operator to TFLite 'add'.""" + """Convert 'add_tensor' operator to NeutronIR 'Add'. + The ExecuTorch schema is: + add.Tensor(Tensor self, Tensor other, Scalar alpha=1) + """ self.assert_convertible(node) - t_op = self._create_tflite_op_with_io_tensors(node) - t_op.builtin_options = add_options.Add() + self.builder.append_operators([t_op]) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py index 1aa58ab5d95..4448146a63e 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py @@ -1,7 +1,8 @@ -# Copyright 2025 NXP +# Copyright 2025-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. + import numpy as np import pytest import torch @@ -9,17 +10,31 @@ from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) -from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.tests.executorch_pipeline import ( + ModelInputSpec, + to_quantized_edge_program, +) from executorch.backends.nxp.tests.executors import ( convert_run_compare, + graph_contains_any_of_ops, ToChannelFirstPreprocess, ToChannelLastPreprocess, ) +from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier +from executorch.backends.nxp.tests.model_output_comparator import ( + NumericalStatsOutputComparator, +) from executorch.backends.nxp.tests.models import ( AddTensorConvModule, AddTensorModule, AddTensorOneInputModule, ) +from executorch.backends.nxp.tests.nsys_testing import lower_run_compare +from executorch.backends.nxp.tests.ops_aliases import ( + AddTensor, + Convolution, + ExecutorchDelegateCall, +) from torch.export import ExportedProgram from executorch.backends.nxp.tests.use_qat import * # noqa F403 @@ -64,7 +79,6 @@ def test_add_tensor_quant_conversion(mocker, input_shape, use_qat): @pytest.mark.parametrize( "input_shape", [ - pytest.param((4,), id="1D."), pytest.param((6, 6), id="2D."), pytest.param((1, 4, 8), id="3D."), pytest.param((1, 4, 8, 8), id="4D."), @@ -92,20 +106,26 @@ def test_add_tensor_one_input_quant_conversion(mocker, input_shape, use_qat): @pytest.mark.parametrize( - "input_shape", + "x_input_shape", [ pytest.param((1, 4, 8, 8), id="4D."), pytest.param((1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."), ], ) -def test_add_tensor_w_conv_quant_conversion(mocker, input_shape, use_qat): +def test_add_tensor_w_conv_quant_conversion(mocker, x_input_shape, use_qat): model = AddTensorConvModule() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + n, c, h, w = x_input_shape + y_input_shape = (n, 8, h, w) + # Run conversion _ = to_quantized_edge_program( - model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False + model, + [x_input_shape, y_input_shape], + use_qat=use_qat, + use_neutron_for_format_conversion=False, ) # Capture generated model @@ -114,7 +134,13 @@ def test_add_tensor_w_conv_quant_conversion(mocker, input_shape, use_qat): # Capture converted program exported_program: ExportedProgram = converter_spy.call_args.args[1] - input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) + input_data_1 = (np.random.random(x_input_shape).astype(np.float32) * 50).astype( + np.int8 + ) + input_data_2 = (np.random.random(y_input_shape).astype(np.float32) * 50).astype( + np.int8 + ) + input_data = {0: input_data_1, 1: input_data_2} convert_run_compare( exported_program, @@ -149,7 +175,7 @@ def test_add_tensor_broadcasting_unsupported_quant_conversion( nodes = list(edge_program.graph.nodes) # Broadcast is not supported, node is not converted - assert nodes[6].target.__name__ == "aten.add.Tensor" # Add Tensor is not delegated. + assert nodes[6].target == AddTensor # Add Tensor is not delegated. # Capture converted program # exported_program: ExportedProgram = converter_spy.call_args.args[1] @@ -159,3 +185,164 @@ def test_add_tensor_broadcasting_unsupported_quant_conversion( # input_data = {0: x_input_data, 1: y_input_data} # # convert_run_compare(exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data) + + +class TestAddTensorNewNeutronFlow: + @pytest.mark.skip("AIR-14602: incorrect results") + @pytest.mark.parametrize( + "x_input_shape", + [ + pytest.param((1,), id="1D."), + pytest.param((6, 8), id="2D."), + pytest.param((1, 4, 8), id="3D."), + pytest.param((1, 4, 8, 8), id="4D."), + ], + ) + def test__basic_nsys_inference(self, x_input_shape, mocker): + x_input_spec = ModelInputSpec(x_input_shape) + model = AddTensorModule() + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={} + ) + + lower_run_compare( + model, + [x_input_spec, x_input_spec], + graph_verifier, + use_new_flow_neutron_c=True, + ) + + @pytest.mark.skip("AIR-14602: incorrect results") + @pytest.mark.parametrize( + "x_input_shape", + [ + pytest.param((6, 8), id="2D."), + pytest.param((1, 4, 8), id="3D."), + pytest.param((1, 4, 8, 8), id="4D."), + ], + ) + def test__basic_nsys_inference_qat(self, x_input_shape, mocker): + x_input_spec = ModelInputSpec(x_input_shape) + model = AddTensorModule() + comparator = NumericalStatsOutputComparator() + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={} + ) + + lower_run_compare( + model, + [x_input_spec, x_input_spec], + graph_verifier, + output_comparator=comparator, + use_new_flow_neutron_c=True, + use_qat=True, + ) + + @pytest.mark.skip("AIR-14602: incorrect results") + @pytest.mark.parametrize( + "input_spec", + [ + pytest.param( + [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D." + ), + pytest.param( + [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))], + id="2 inputs 3D.", + ), + pytest.param( + [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 2D+3D." + ), + ], + ) + def test__correct_broadcast(self, input_spec, mocker): + model = AddTensorModule() + graph_verifier = DetailedGraphVerifier( + mocker, expected_delegated_ops={AddTensor: 1}, expected_non_delegated_ops={} + ) + + lower_run_compare( + model, input_spec, graph_verifier, use_new_flow_neutron_c=True + ) + + @pytest.mark.parametrize( + "input_spec", + [ + pytest.param( + [ModelInputSpec((4, 1)), ModelInputSpec((1, 6))], id="2 inputs 2D." + ), + pytest.param( + [ModelInputSpec((1, 3, 4)), ModelInputSpec((5, 3, 1))], + id="2 inputs 3D.", + ), + pytest.param( + [ModelInputSpec((6, 4)), ModelInputSpec((6, 6, 1))], + id="2 inputs 2D+3D.", + ), + ], + ) + def test__incorrect_broadcast(self, input_spec): + # Broadcast where at least one of the inputs is not equal to output is not supported + model = AddTensorModule() + + delegated_ep = to_quantized_edge_program( + model, input_spec, use_new_flow_neutron_c=True + ).exported_program() + + # Make sure the `add.Tensor` was NOT delegated. + assert not graph_contains_any_of_ops( + delegated_ep.graph, [ExecutorchDelegateCall] + ) + assert graph_contains_any_of_ops(delegated_ep.graph, [AddTensor]) + + @pytest.mark.skip("AIR-14602: incorrect results") + @pytest.mark.parametrize( + "x_input_shape", + [ + pytest.param( + (1, 4, 5, 5), id="4D, product of dims is not a multiple of 8." + ), + ], + ) + def test__w_conv(self, x_input_shape, mocker): + model = AddTensorConvModule() + + n, c, h, w = x_input_shape + y_input_spec = ModelInputSpec((n, 8, h, w)) + x_input_spec = ModelInputSpec(x_input_shape) + + graph_verifier = DetailedGraphVerifier( + mocker, + expected_delegated_ops={AddTensor: 1, Convolution: 1}, + expected_non_delegated_ops={}, + ) + + lower_run_compare( + model, + [x_input_spec, y_input_spec], + graph_verifier, + use_new_flow_neutron_c=True, + ) + + @pytest.mark.parametrize( + "input_spec", + [ + pytest.param( + [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))], + id="2 inputs 4D + 2D.", + ), + pytest.param( + [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))], + id="2 inputs last + 3D.", + ), + ], + ) + def test__w_conv_unsupported(self, input_spec): + model = AddTensorConvModule() + + delegated_ep = to_quantized_edge_program( + model, input_spec, use_new_flow_neutron_c=True + ).exported_program() + + # Make sure the `add.Tensor` was NOT delegated. + assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) + assert graph_contains_any_of_ops(delegated_ep.graph, [AddTensor]) diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py index 045dcfaba40..1292c4cf17d 100644 --- a/backends/nxp/tests/models.py +++ b/backends/nxp/tests/models.py @@ -656,9 +656,9 @@ def __init__(self): super().__init__() self.conv = Conv2dModule(padding=1, stride=1) - def forward(self, x): + def forward(self, x, y): x = self.conv(x) - return x + x + return x + y class AddTensorOneInputModule(torch.nn.Module): diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py index f190ca91e1f..1b42a91fb37 100644 --- a/backends/nxp/tests/ops_aliases.py +++ b/backends/nxp/tests/ops_aliases.py @@ -12,6 +12,7 @@ from executorch.exir.dialects._ops import ops as exir_ops Abs = exir_ops.edge.aten.abs.default +AddTensor = exir_ops.edge.aten.add.Tensor AvgPool2D = exir_ops.edge.aten.avg_pool2d.default Bmm = exir_ops.edge.aten.bmm.default Convolution = exir_ops.edge.aten.convolution.default