NXP backend: Enable new Neutron C flow support for Clamp operator

StrycekSimon · StrycekSimon · commit a63628225221 · 2026-05-13T07:45:33.000+02:00
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
@@ -3,15 +3,24 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+from copy import copy
+
+import numpy as np
 from executorch.backends.nxp.backend.edge_helper import try_get_arg
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
+    _is_dequant_node,
     CustomDelegationOptions,
     is_not_qdq_node,
     NodeConverter,
 )
 from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
     BuiltinOperator,
 )
+from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
+from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
+    maximum_options,
+    minimum_options,
+)
 from executorch.backends.nxp.backend.neutron_operator_support import (
     activation_supported_on_target,
 )
@@ -21,6 +30,16 @@
 from torch.nn import Parameter
 
 
+def _is_convertible_to_relu(node):
+    bounds = ClampConverter._get_clamp_bounds(node)
+
+    # Only some specific bounds are supported on the target hardware.
+    if bounds not in ClampConverter.SUPPORTED_BOUNDS.values():
+        return False
+
+    return True
+
+
 class ClampConverter(NodeConverter):
     SUPPORTED_BOUNDS = {
         "ReluN1To1": (-1, 1),
@@ -48,7 +67,7 @@ def _get_clamp_bounds(clamp_node: Node) -> tuple[float | None, float | None]:
     def _is_supported_in_IR(
         node: Node,
         parameters_mapping: dict[str, Parameter],
-        custom_delegation_options: CustomDelegationOptions,
+        _: CustomDelegationOptions,
     ) -> bool:
         # No NeutronIR-specific restrictions.
         return True
@@ -58,22 +77,19 @@ def _is_supported_on_target(
         node: Node,
         neutron_target_spec: NeutronTargetSpec,
         parameters_mapping: dict[str, Parameter],
-        custom_delegation_options: CustomDelegationOptions,
+        _: CustomDelegationOptions,
     ) -> bool:
-        bounds = ClampConverter._get_clamp_bounds(node)
-
-        # Only some specific bounds are supported on the target hardware.
-        if bounds not in ClampConverter.SUPPORTED_BOUNDS.values():
-            return False
+        if neutron_target_spec.use_new_flow_neutron_c:
+            return True
 
-        return True
+        return _is_convertible_to_relu(node)
 
     @classmethod
     def supports_partitioning_result(
         cls,
         node: Node,
         partition_list: list[Partition],
-        custom_delegation_options: CustomDelegationOptions,
+        _: CustomDelegationOptions,
         neutron_target_spec: NeutronTargetSpec,
         parameters_mapping: dict[str, Parameter],
     ) -> bool:
@@ -91,6 +107,15 @@ def supports_partitioning_result(
 
         return True
 
+    @staticmethod
+    def propagate_quantization(from_node, to_node):
+        to_node.quantization = copy(from_node.quantization)
+
+    @staticmethod
+    def _quantize_value(value, zp, scale, quant_min, quant_max):
+        rescaled_value = round(value / scale) + zp
+        return np.clip(rescaled_value, quant_min, quant_max)
+
     def convert(self, node: Node):
         """Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators.
         The schema is:
@@ -101,13 +126,57 @@ def convert(self, node: Node):
             ) -> Tensor
         """
         self.assert_convertible(node)
+        to_relu = _is_convertible_to_relu(node)
 
         bounds = self._get_clamp_bounds(node)
-
         t_op = self._create_tflite_op_with_io_tensors(node)
 
-        # noinspection PyTypeChecker,PyUnboundLocalVariable
-        t_op.opcode_index = self.builder.op_code_index_for_op_type(
-            self.BOUNDS_TO_NEUTRON_IR_OP[bounds]
+        if not self.neutron_target_spec.use_new_flow_neutron_c or to_relu:
+            # noinspection PyTypeChecker,PyUnboundLocalVariable
+            t_op.opcode_index = self.builder.op_code_index_for_op_type(
+                self.BOUNDS_TO_NEUTRON_IR_OP[bounds]
+            )
+            self.builder.append_operators([t_op])
+            return
+
+        q_node = node.args[0]
+        assert _is_dequant_node(q_node)
+        _, scale, zp, quant_min, quant_max, _ = q_node.args
+
+        x = t_op.tmp_inputs[0]
+        y = t_op.tmp_outputs[0]
+
+        if x.quantization is not None and y.quantization is None:
+            self.propagate_quantization(x, y)
+
+        if x.quantization != y.quantization:
+            raise AssertionError(
+                "Input and output quantization should be same in order to convert to max/min."
+            )
+
+        max_y = self.builder.duplicate_tensor(x)
+
+        min_value, max_value = bounds
+        min_value = self._quantize_value(min_value, zp, scale, quant_min, quant_max)
+        max_value = self._quantize_value(max_value, zp, scale, quant_min, quant_max)
+
+        min_tensor = self.builder.create_tensor_for_data(
+            np.array([min_value], np.int8), "min"
+        )
+        self.propagate_quantization(x, min_tensor)
+        max_tensor = self.builder.create_tensor_for_data(
+            np.array([max_value], np.int8), "max"
         )
-        self.builder.append_operators([t_op])
+        self.propagate_quantization(x, max_tensor)
+
+        max_op = tflite_model.Operator(builtin_options=maximum_options.Maximum())
+        max_op.tmp_inputs = [x, max_tensor]
+        max_op.tmp_outputs = [max_y]
+
+        min_op = tflite_model.Operator(builtin_options=minimum_options.Minimum())
+        min_op.tmp_inputs = [max_y, min_tensor]
+        min_op.tmp_outputs = [y]
+
+        self.propagate_quantization(x, max_y)
+
+        self.builder.append_operators([max_op, min_op])
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
@@ -10,6 +10,9 @@
 from functools import partial
 
 import torch
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
+    _is_convertible_to_relu,
+)
 from executorch.backends.nxp.quantizer.utils import (
     get_bias_qparams,
     get_bias_qparams_transp_conv,
@@ -408,12 +411,48 @@ def get_anchors(
         )
 
 
-class ClampPattern(SingleInputBasicPattern):
+class ClampPattern(QuantizationPattern):
     """Quantizer for the `aten.clamp.default` operator."""
 
     def partition_types(self):
         return [torch.ops.aten.clamp.default]
 
+    def get_anchors(
+        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
+    ) -> PartitionAnchors | None:
+        node = fused_partition[0].nodes[-1]
+
+        if (
+            self.neutron_quantizer.neutron_target_spec.use_new_flow_neutron_c
+            and not _is_convertible_to_relu(node)
+        ):
+            # Shared spec pattern
+            assert len(fused_partition[0].input_nodes) == 1
+            prev_node = fused_partition[0].input_nodes[0]
+
+            # Previous node was not quantized => we are not able to share q-params
+            if Q_ANNOTATION_KEY not in prev_node.meta:
+                return None
+
+            qspec = SharedQuantizationSpec(prev_node)
+
+            return PartitionAnchors(
+                inputs=[(node, NodeArgsIdx(0))],
+                weights=[],
+                biases=[],
+                output=[
+                    (node, qspec),
+                ],
+            )
+        else:
+            # Single input pattern
+            return PartitionAnchors(
+                inputs=[(node, NodeArgsIdx(0))],
+                weights=[],
+                biases=[],
+                output=[(node,)],
+            )
+
 
 def _is_batch_norm(node_: Node) -> bool:
     return node_.op == "call_function" and node_.target in [