Skip to content

Commit 0a4a466

Browse files
committed
NXP backend: Enable new Neutron C flow support for Clamp operator
1 parent aa85afe commit 0a4a466

2 files changed

Lines changed: 185 additions & 21 deletions

File tree

backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py

Lines changed: 151 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,32 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6+
import math
7+
8+
import numpy as np
9+
import torch
610
from executorch.backends.nxp.backend.edge_helper import try_get_arg
11+
from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
12+
torch_type_to_numpy_type,
13+
)
714
from executorch.backends.nxp.backend.ir.converter.node_converter import (
15+
_is_dequant_node,
16+
_is_quant_node,
817
CustomDelegationOptions,
918
is_not_qdq_node,
1019
NodeConverter,
1120
)
21+
from executorch.backends.nxp.backend.ir.converter.quantization_utils import (
22+
propagate_quantization,
23+
)
1224
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
1325
BuiltinOperator,
1426
)
27+
from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
28+
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
29+
maximum_options,
30+
minimum_options,
31+
)
1532
from executorch.backends.nxp.backend.neutron_operator_support import (
1633
activation_supported_on_target,
1734
)
@@ -21,15 +38,25 @@
2138
from torch.nn import Parameter
2239

2340

41+
def _is_convertible_to_relu(node):
42+
bounds = ClampConverter._get_clamp_bounds(node)
43+
44+
# Only some specific bounds are supported on the target hardware.
45+
if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values():
46+
return False
47+
48+
return True
49+
50+
2451
class ClampConverter(NodeConverter):
25-
SUPPORTED_BOUNDS = {
52+
RELU_COMPATIBLE_BOUNDS = {
2653
"ReluN1To1": (-1, 1),
2754
"Relu0To1": (0, 1),
2855
"Relu6": (0, 6),
2956
"Relu": (0, None),
3057
}
3158

32-
BOUNDS_TO_NEUTRON_IR_OP = {
59+
BOUNDS_TO_RELU_NEUTRON_IR_OP = {
3360
(-1, 1): BuiltinOperator.RELU_N1_TO_1,
3461
(0, 1): BuiltinOperator.RELU_0_TO_1,
3562
(0, 6): BuiltinOperator.RELU6,
@@ -53,27 +80,52 @@ def _is_supported_in_IR(
5380
# No NeutronIR-specific restrictions.
5481
return True
5582

83+
@staticmethod
84+
def _io_quant_is_same(node: Node):
85+
quant = next(iter(node.users.keys()))
86+
dequant = node.args[0]
87+
88+
if not _is_dequant_node(dequant):
89+
return False
90+
91+
if not _is_quant_node(quant):
92+
return False
93+
94+
q_params = quant.args[1:]
95+
dq_params = dequant.args[1:]
96+
return all([q == dq for q, dq in zip(q_params, dq_params)])
97+
5698
@staticmethod
5799
def _is_supported_on_target(
58100
node: Node,
59101
neutron_target_spec: NeutronTargetSpec,
60102
parameters_mapping: dict[str, Parameter],
61103
custom_delegation_options: CustomDelegationOptions,
62104
) -> bool:
63-
bounds = ClampConverter._get_clamp_bounds(node)
105+
relu_compatible = _is_convertible_to_relu(node)
106+
107+
if neutron_target_spec.use_new_flow_neutron_c:
108+
io_quant_consistent = ClampConverter._io_quant_is_same(node)
109+
quant_supported = NodeConverter.uses_quantization_type_for_io(
110+
node,
111+
supported_types=[torch.int8, torch.uint8],
112+
input_indices=[0],
113+
output_indices=[0],
114+
)
64115

65-
# Only some specific bounds are supported on the target hardware.
66-
if bounds not in ClampConverter.SUPPORTED_BOUNDS.values():
67-
return False
116+
# We either convert to ReLU -> SingleInputQuantization pattern
117+
# or we convert to Min/Max, which requires same quantization on
118+
# both input and output.
119+
return (relu_compatible | io_quant_consistent) and quant_supported
68120

69-
return True
121+
return relu_compatible
70122

71123
@classmethod
72124
def supports_partitioning_result(
73125
cls,
74126
node: Node,
75127
partition_list: list[Partition],
76-
custom_delegation_options: CustomDelegationOptions,
128+
_: CustomDelegationOptions,
77129
neutron_target_spec: NeutronTargetSpec,
78130
parameters_mapping: dict[str, Parameter],
79131
) -> bool:
@@ -82,7 +134,10 @@ def supports_partitioning_result(
82134
# Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
83135
# and at the same time the node does not satisfy delegation requirements.
84136
# In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
85-
if bounds in [cls.SUPPORTED_BOUNDS["Relu"], cls.SUPPORTED_BOUNDS["Relu6"]]:
137+
if bounds in [
138+
cls.RELU_COMPATIBLE_BOUNDS["Relu"],
139+
cls.RELU_COMPATIBLE_BOUNDS["Relu6"],
140+
]:
86141
is_alone_in_partition = cls.is_node_alone_in_partition(
87142
node, partition_list, filter_fn=is_not_qdq_node
88143
)
@@ -91,8 +146,21 @@ def supports_partitioning_result(
91146

92147
return True
93148

149+
@staticmethod
150+
def _quantize_value(
151+
value: int,
152+
zp: int,
153+
scale: float,
154+
quant_min: int,
155+
quant_max: int,
156+
dtype: type = np.int8,
157+
) -> np.integer:
158+
rescaled_value = round(value / scale) + zp
159+
return dtype(np.clip(rescaled_value, quant_min, quant_max))
160+
94161
def convert(self, node: Node):
95-
"""Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators.
162+
"""Convert the `aten.clamp.default` operator to either
163+
Neutron IR `Relu*` operator or combination of `Min` and `Max`.
96164
The schema is:
97165
aten::clamp(
98166
Tensor self,
@@ -101,13 +169,81 @@ def convert(self, node: Node):
101169
) -> Tensor
102170
"""
103171
self.assert_convertible(node)
172+
to_relu = _is_convertible_to_relu(node)
104173

105174
bounds = self._get_clamp_bounds(node)
106-
107175
t_op = self._create_tflite_op_with_io_tensors(node)
108176

109-
# noinspection PyTypeChecker,PyUnboundLocalVariable
110-
t_op.opcode_index = self.builder.op_code_index_for_op_type(
111-
self.BOUNDS_TO_NEUTRON_IR_OP[bounds]
177+
if not self.neutron_target_spec.use_new_flow_neutron_c or to_relu:
178+
# noinspection PyTypeChecker,PyUnboundLocalVariable
179+
t_op.opcode_index = self.builder.op_code_index_for_op_type(
180+
self.BOUNDS_TO_RELU_NEUTRON_IR_OP[bounds]
181+
)
182+
self.builder.append_operators([t_op])
183+
return
184+
185+
q_node = node.args[0]
186+
assert _is_dequant_node(q_node)
187+
_, scale, zp, quant_min, quant_max, q_type = q_node.args
188+
q_type = torch_type_to_numpy_type(q_type).type
189+
190+
x = t_op.tmp_inputs[0]
191+
y = t_op.tmp_outputs[0]
192+
193+
if x.quantization is not None and y.quantization is None:
194+
propagate_quantization(x, y)
195+
196+
min_value, max_value = (
197+
v if v is not None and math.isfinite(v) else None for v in bounds
112198
)
113-
self.builder.append_operators([t_op])
199+
200+
if min_value is not None:
201+
min_value = self._quantize_value(
202+
value=min_value,
203+
zp=zp,
204+
scale=scale,
205+
quant_min=quant_min,
206+
quant_max=quant_max,
207+
dtype=q_type,
208+
)
209+
min_tensor = self.builder.create_tensor_for_data(
210+
np.array([min_value], q_type), "min"
211+
)
212+
propagate_quantization(x, min_tensor)
213+
214+
if max_value is not None:
215+
max_value = self._quantize_value(
216+
value=max_value,
217+
zp=zp,
218+
scale=scale,
219+
quant_min=quant_min,
220+
quant_max=quant_max,
221+
dtype=q_type,
222+
)
223+
max_tensor = self.builder.create_tensor_for_data(
224+
np.array([max_value], q_type), "max"
225+
)
226+
propagate_quantization(x, max_tensor)
227+
228+
if None not in [min_value, max_value]:
229+
tmp_y = self.builder.duplicate_tensor(x)
230+
tmp_x = tmp_y
231+
propagate_quantization(x, tmp_y)
232+
else:
233+
tmp_y = y
234+
tmp_x = x
235+
236+
ops_to_add = []
237+
if max_value is not None:
238+
min_op = tflite_model.Operator(builtin_options=minimum_options.Minimum())
239+
min_op.tmp_inputs = [x, max_tensor]
240+
min_op.tmp_outputs = [tmp_y]
241+
ops_to_add.append(min_op)
242+
243+
if min_value is not None:
244+
max_op = tflite_model.Operator(builtin_options=maximum_options.Maximum())
245+
max_op.tmp_inputs = [tmp_x, min_tensor]
246+
max_op.tmp_outputs = [y]
247+
ops_to_add.append(max_op)
248+
249+
self.builder.append_operators(ops_to_add)

backends/nxp/quantizer/patterns.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
from functools import partial
1111

1212
import torch
13+
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
14+
_is_convertible_to_relu,
15+
)
1316
from executorch.backends.nxp.quantizer.utils import (
1417
get_bias_qparams,
1518
get_bias_qparams_transp_conv,
@@ -115,8 +118,9 @@ class SharedSpecPattern(QuantizationPattern):
115118
def partition_types(self) -> list[torch.nn.Module]:
116119
pass
117120

118-
def get_anchors(
119-
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
121+
@staticmethod
122+
def get_shared_spec_anchors(
123+
gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
120124
) -> PartitionAnchors | None:
121125
node = fused_partition[0].nodes[-1]
122126
assert len(fused_partition[0].input_nodes) == 1
@@ -137,15 +141,21 @@ def get_anchors(
137141
],
138142
)
139143

144+
def get_anchors(
145+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
146+
) -> PartitionAnchors | None:
147+
return self.get_shared_spec_anchors(gm, fused_partition)
148+
140149

141150
class SingleInputBasicPattern(QuantizationPattern):
142151
@abstractmethod
143152
def partition_types(self) -> list[OpOverload]:
144153
pass
145154

146-
def get_anchors(
147-
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
148-
) -> PartitionAnchors | None:
155+
@staticmethod
156+
def get_single_input_anchors(
157+
gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
158+
):
149159
node = fused_partition[0].nodes[-1]
150160

151161
return PartitionAnchors(
@@ -155,6 +165,11 @@ def get_anchors(
155165
output=[(node,)],
156166
)
157167

168+
def get_anchors(
169+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
170+
) -> PartitionAnchors | None:
171+
return self.get_single_input_anchors(gm, fused_partition)
172+
158173

159174
class BatchNormPattern(QuantizationPattern):
160175
def partition_types(self) -> list[OpOverload]:
@@ -408,12 +423,25 @@ def get_anchors(
408423
)
409424

410425

411-
class ClampPattern(SingleInputBasicPattern):
426+
class ClampPattern(QuantizationPattern):
412427
"""Quantizer for the `aten.clamp.default` operator."""
413428

414429
def partition_types(self):
415430
return [torch.ops.aten.clamp.default]
416431

432+
def get_anchors(
433+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
434+
) -> PartitionAnchors | None:
435+
node = fused_partition[0].nodes[-1]
436+
437+
if (
438+
self.neutron_quantizer.neutron_target_spec.use_new_flow_neutron_c
439+
and not _is_convertible_to_relu(node)
440+
):
441+
return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition)
442+
else:
443+
return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition)
444+
417445

418446
def _is_batch_norm(node_: Node) -> bool:
419447
return node_.op == "call_function" and node_.target in [

0 commit comments

Comments
 (0)