NXP backend: Extended support of HardTanh with new Neutron C flow (pytorch#20177)

StrycekSimon · MartinPavella · web-flow · commit ca95da101601 · 2026-06-22T08:20:03.000+02:00
### Summary - Adjusted Clamp-related implementation for reuse in HardTanh as both operators share the same logic. - Add new Neutron C flow support for HardTanh operator. ### Test plan Covered by newly added tests. cc @robert-kalmar @JakeStevens @digantdesai @rascani --------- Co-authored-by: Martin Pavella <martin.pavella@nxp.com>
diff --git a/backends/nxp/backend/graph_utils.py b/backends/nxp/backend/graph_utils.py
@@ -56,7 +56,7 @@ def get_output_shape(node: Node) -> tuple[torch.Size] | torch.Size | None:
 
 
 def is_clamp_preserved_under_quantization(
-    node: Node, min_val: int = 0, max_val: int | None = None
+    node: Node, min_val: float = 0, max_val: float | None = None
 ) -> bool:
     """
     Checks if Clamp/ReLU/HardTanh is preserved under quantization and did
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
@@ -42,17 +42,6 @@
 from torch.nn import Parameter
 
 
-def _is_convertible_to_relu(node):
-    bounds = ClampConverter._get_clamp_bounds(node)
-    bounds = tuple(v if v is not None and math.isfinite(v) else None for v in bounds)
-
-    # Some specific bounds can be replaced with single op ReLU.
-    if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values():
-        return False
-
-    return True
-
-
 class ClampConverter(NodeConverter):
     RELU_COMPATIBLE_BOUNDS = {
         "ReluN1To1": (-1, 1),
@@ -70,12 +59,25 @@ class ClampConverter(NodeConverter):
 
     # noinspection PyShadowingBuiltins
     @staticmethod
-    def _get_clamp_bounds(clamp_node: Node) -> tuple[float | None, float | None]:
+    def _get_bounds(node: Node) -> tuple[float | None, float | None]:
         """Extract min and max bounds from `aten.clamp.default` node."""
-        min = try_get_arg(clamp_node, 1)
-        max = try_get_arg(clamp_node, 2)
+        min = try_get_arg(node, 1)
+        max = try_get_arg(node, 2)
         return min, max
 
+    @classmethod
+    def _is_convertible_to_relu(cls, node):
+        bounds = cls._get_bounds(node)
+        bounds = tuple(
+            v if v is not None and math.isfinite(v) else None for v in bounds
+        )
+
+        # Some specific bounds can be replaced with single op ReLU.
+        if bounds not in cls.RELU_COMPATIBLE_BOUNDS.values():
+            return False
+
+        return True
+
     @staticmethod
     def _is_supported_in_IR(
         node: Node,
@@ -100,20 +102,21 @@ def _io_quant_is_same(node: Node):
         dq_params = dequant.args[1:]
         return all(q == dq for q, dq in zip(q_params, dq_params))
 
-    @staticmethod
+    @classmethod
     def _is_supported_on_target(
+        cls,
         node: Node,
         neutron_target_spec: NeutronTargetSpec,
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        relu_compatible = _is_convertible_to_relu(node)
-        bounds = ClampConverter._get_clamp_bounds(node)
+        relu_compatible = cls._is_convertible_to_relu(node)
+        bounds = cls._get_bounds(node)
 
         if all(b is None or math.isinf(b) for b in bounds):
             return False
 
-        io_quant_consistent = ClampConverter._io_quant_is_same(node)
+        io_quant_consistent = cls._io_quant_is_same(node)
         quant_supported = NodeConverter.uses_quantization_type_for_io(
             node,
             supported_types=[torch.int8, torch.uint8],
@@ -138,19 +141,20 @@ def supports_partitioning_result(
         neutron_target_spec: NeutronTargetSpec,
         parameters_mapping: dict[str, Parameter],
     ) -> bool:
-        bounds = cls._get_clamp_bounds(node)
+        bounds = cls._get_bounds(node)
 
         # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
         # and at the same time the node does not satisfy delegation requirements.
-        # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
+        # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfully.
         if bounds in cls.RELU_COMPATIBLE_BOUNDS.values():
             is_alone_in_partition = cls.is_node_alone_in_partition(
                 node, partition_list, filter_fn=is_not_qdq_node
             )
             if is_alone_in_partition:
+                # noinspection PyTypeChecker
                 return is_clamp_preserved_under_quantization(
                     node,
-                    min_val=bounds[0],
+                    min_val=bounds[0] if bounds[0] is not None else 0,
                     max_val=bounds[1],
                 )
 
@@ -167,9 +171,9 @@ def convert(self, node: Node):
             ) -> Tensor
         """
         self.assert_convertible(node)
-        to_relu = _is_convertible_to_relu(node)
+        to_relu = self._is_convertible_to_relu(node)
 
-        bounds = self._get_clamp_bounds(node)
+        bounds = self._get_bounds(node)
         bounds = tuple(
             v if v is not None and math.isfinite(v) else None for v in bounds
         )
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py
@@ -3,43 +3,16 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.ir.converter.node_converter import (
-    CustomDelegationOptions,
-    is_not_qdq_node,
-    NodeConverter,
-    Partition,
-)
-from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
-    BuiltinOperator,
-)
-from executorch.backends.nxp.backend.neutron_operator_support import (
-    activation_supported_on_target,
+
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
+    ClampConverter,
 )
-from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
-from torch.nn import Parameter
-
 
-class HardTanhConverter(NodeConverter):
-
-    # Maps possible input parameters of HardTanh to equivalent ReLU-based operators supported by TFLite.
-    SUPPORTED_MODES_MAP = {
-        (0.0, 6.0): BuiltinOperator.RELU6,
-        (-1.0, 1.0): BuiltinOperator.RELU_N1_TO_1,
-        (0.0, 1.0): BuiltinOperator.RELU_0_TO_1,
-        (0.0, float("inf")): BuiltinOperator.RELU,
-    }
-
-    # Maps possible modes of HardTanh to equivalent ReLU bounds.
-    SUPPORTED_BOUNDS_MAP = {
-        "ReluN1To1": (-1.0, 1.0),
-        "Relu0To1": (0.0, 1.0),
-        "Relu6": (0.0, 6.0),
-        "Relu": (0.0, float("inf")),
-    }
 
+class HardTanhConverter(ClampConverter):
     @staticmethod
-    def _get_hardtanh_bounds(node: Node) -> tuple[float, float]:
+    def _get_bounds(node: Node) -> tuple[float | None, float | None]:
         args = node.args
 
         match len(args):
@@ -62,51 +35,3 @@ def _get_hardtanh_bounds(node: Node) -> tuple[float, float]:
                 )
 
         return min_val, max_val
-
-    @staticmethod
-    def _is_supported_in_IR(
-        node: Node,
-        parameters_mapping: dict[str, Parameter],
-        custom_delegation_options: CustomDelegationOptions,
-    ) -> bool:
-        bounds = HardTanhConverter._get_hardtanh_bounds(node)
-        return bounds in HardTanhConverter.SUPPORTED_MODES_MAP
-
-    @classmethod
-    def supports_partitioning_result(
-        cls,
-        node: Node,
-        partition_list: list[Partition],
-        custom_delegation_options: CustomDelegationOptions,
-        neutron_target_spec: NeutronTargetSpec,
-        parameters_mapping: dict[str, Parameter],
-    ) -> bool:
-        bounds = HardTanhConverter._get_hardtanh_bounds(node)
-
-        # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
-        # and at the same time the node does not satisfy delegation requirements.
-        # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
-        if bounds in [
-            cls.SUPPORTED_BOUNDS_MAP["Relu"],
-            cls.SUPPORTED_BOUNDS_MAP["Relu6"],
-        ]:
-            is_alone_in_partition = cls.is_node_alone_in_partition(
-                node, partition_list, filter_fn=is_not_qdq_node
-            )
-            if is_alone_in_partition:
-                return activation_supported_on_target(node)
-
-        return True
-
-    def convert(self, node: Node):
-        """Convert 'aten::hardtanh' to its supported ReLU equivalent."""
-        self.assert_convertible(node)
-
-        t_op = self._create_tflite_op_with_io_tensors(node)
-
-        bounds = HardTanhConverter._get_hardtanh_bounds(node)
-
-        op = self.SUPPORTED_MODES_MAP[bounds]
-        t_op.opcode_index = self.builder.op_code_index_for_op_type(op)
-
-        self.builder.append_operators([t_op])
diff --git a/backends/nxp/backend/ir/converter/quantization_utils.py b/backends/nxp/backend/ir/converter/quantization_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2023-2025 NXP
+# Copyright 2023-2026 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -135,11 +135,12 @@ def set_quantization_parameters_to_tensor(
 def quantize_int8(
     data: np.ndarray, scale: List[float], zero_point: List[int]
 ) -> np.ndarray:
+    # noinspection PyTypeChecker
     return quantize(data, zero_point=zero_point, scale=scale)
 
 
 def quantize(
-    value: np.ndarray | int,
+    value: np.ndarray | float,
     zero_point: List[int] | int,
     scale: List[float] | float,
     quant_min: int = -128,
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
@@ -11,7 +11,10 @@
 
 import torch
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
-    _is_convertible_to_relu,
+    ClampConverter,
+)
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.hardtanh_converter import (
+    HardTanhConverter,
 )
 from executorch.backends.nxp.quantizer.utils import (
     get_bias_qparams,
@@ -438,7 +441,7 @@ def get_anchors(
     ) -> PartitionAnchors | None:
         node = fused_partition[0].nodes[-1]
 
-        if not _is_convertible_to_relu(node):
+        if not ClampConverter._is_convertible_to_relu(node):
             return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition)
         else:
             return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition)
@@ -726,33 +729,28 @@ class HardTanhPattern(SingleInputBasicPattern):
     def partition_types(self):
         return [torch.ops.aten.hardtanh.default]
 
+    def get_anchors(
+        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
+    ) -> PartitionAnchors | None:
+        node = fused_partition[0].nodes[-1]
+
+        if not HardTanhConverter._is_convertible_to_relu(node):
+            return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition)
+        else:
+            return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition)
+
     def replacement_op(self):
         raise AssertionError()
 
 
-class HardTanhInPlacePattern(SingleInputBasicPattern):
+class HardTanhInPlacePattern(HardTanhPattern):
     """
     Quantizer for HardTanh operator with param inplace=True.
     """
 
     def partition_types(self):
         return [torch.ops.aten.hardtanh_.default]
 
-    def get_anchors(
-        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
-    ) -> PartitionAnchors | None:
-        node = fused_partition[0].nodes[-1]
-
-        return PartitionAnchors(
-            inputs=[(node, NodeArgsIdx(0))],
-            weights=[],
-            biases=[],
-            output=[(node,)],
-        )
-
-    def replacement_op(self):
-        raise AssertionError()
-
 
 class LeakyReluPattern(SingleInputBasicPattern):
     """Quantizer for the `aten.leaky_relu.default` operator."""
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py
@@ -24,9 +24,6 @@
 )
 from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops
 from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
-from executorch.backends.nxp.tests.model_output_comparator import (
-    NumericalStatsOutputComparator,
-)
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
 from executorch.backends.nxp.tests.ops_aliases import (
     AddTensor,
@@ -68,6 +65,35 @@ def forward(self, x):
 
 
 class TestClamp:
+
+    @pytest.mark.parametrize(
+        "min, max",
+        [
+            pytest.param(-1, 2, id="min = -1, max = 2 (Max/Min)"),
+            pytest.param(0.0, None, id="min = 0, max = None (Relu)"),
+        ],
+    )
+    def test__qat(self, mocker, request, min, max, use_qat):
+        input_shape = (2, 7, 2)  # Indivisible by num_macs
+        model = AddClampModule(min, max)
+
+        x_input_spec = ModelInputSpec(input_shape)
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={
+                AddTensor: 1,
+                Clamp: 1,
+            },
+            expected_non_delegated_ops={},
+        )
+
+        lower_run_compare(
+            model=model,
+            input_spec=[x_input_spec],
+            request=request,
+            dlg_model_verifier=graph_verifier,
+        )
+
     @pytest.mark.parametrize(
         "min, max",
         [
@@ -90,12 +116,11 @@ class TestClamp:
             pytest.param(0.0, None, id="min = 0, max = None (Relu)"),
         ],
     )
-    def test_convert_clamp__full_pipeline(self, mocker, request, min, max, use_qat):
+    def test_convert_clamp__full_pipeline(self, mocker, request, min, max):
         input_shape = (2, 7, 2)  # Indivisible by num_macs
         model = AddClampModule(min, max)
 
         x_input_spec = ModelInputSpec(input_shape)
-        comparator = NumericalStatsOutputComparator()
         graph_verifier = DetailedGraphVerifier(
             mocker,
             expected_delegated_ops={
@@ -110,8 +135,6 @@ def test_convert_clamp__full_pipeline(self, mocker, request, min, max, use_qat):
             input_spec=[x_input_spec],
             dlg_model_verifier=graph_verifier,
             request=request,
-            output_comparator=comparator,
-            use_qat=use_qat,
         )
 
     @pytest.mark.parametrize(
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py