Skip to content

Commit 3a5a496

Browse files
committed
NXP backend: Enable new Neutron C flow support for Clamp operator
1 parent aa85afe commit 3a5a496

2 files changed

Lines changed: 171 additions & 22 deletions

File tree

backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py

Lines changed: 137 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,29 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6+
from copy import copy
7+
8+
import numpy as np
9+
import torch
610
from executorch.backends.nxp.backend.edge_helper import try_get_arg
711
from executorch.backends.nxp.backend.ir.converter.node_converter import (
12+
_is_dequant_node,
13+
_is_quant_node,
814
CustomDelegationOptions,
915
is_not_qdq_node,
1016
NodeConverter,
1117
)
18+
from executorch.backends.nxp.backend.ir.converter.quantization_utils import (
19+
propagate_quantization,
20+
)
1221
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
1322
BuiltinOperator,
1423
)
24+
from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
25+
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
26+
maximum_options,
27+
minimum_options,
28+
)
1529
from executorch.backends.nxp.backend.neutron_operator_support import (
1630
activation_supported_on_target,
1731
)
@@ -21,15 +35,25 @@
2135
from torch.nn import Parameter
2236

2337

38+
def _is_convertible_to_relu(node):
39+
bounds = ClampConverter._get_clamp_bounds(node)
40+
41+
# Only some specific bounds are supported on the target hardware.
42+
if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values():
43+
return False
44+
45+
return True
46+
47+
2448
class ClampConverter(NodeConverter):
25-
SUPPORTED_BOUNDS = {
49+
RELU_COMPATIBLE_BOUNDS = {
2650
"ReluN1To1": (-1, 1),
2751
"Relu0To1": (0, 1),
2852
"Relu6": (0, 6),
2953
"Relu": (0, None),
3054
}
3155

32-
BOUNDS_TO_NEUTRON_IR_OP = {
56+
BOUNDS_TO_RELU_NEUTRON_IR_OP = {
3357
(-1, 1): BuiltinOperator.RELU_N1_TO_1,
3458
(0, 1): BuiltinOperator.RELU_0_TO_1,
3559
(0, 6): BuiltinOperator.RELU6,
@@ -53,27 +77,52 @@ def _is_supported_in_IR(
5377
# No NeutronIR-specific restrictions.
5478
return True
5579

80+
@staticmethod
81+
def _io_quant_is_same(node: Node):
82+
quant = next(iter(node.users.keys()))
83+
dequant = node.args[0]
84+
85+
if not _is_dequant_node(dequant):
86+
return False
87+
88+
if not _is_quant_node(quant):
89+
return False
90+
91+
q_params = quant.args[1:]
92+
dq_params = dequant.args[1:]
93+
return all([q == dq for q, dq in zip(q_params, dq_params)])
94+
5695
@staticmethod
5796
def _is_supported_on_target(
5897
node: Node,
5998
neutron_target_spec: NeutronTargetSpec,
6099
parameters_mapping: dict[str, Parameter],
61100
custom_delegation_options: CustomDelegationOptions,
62101
) -> bool:
63-
bounds = ClampConverter._get_clamp_bounds(node)
102+
relu_compatible = _is_convertible_to_relu(node)
103+
104+
if neutron_target_spec.use_new_flow_neutron_c:
105+
io_quant_consistent = ClampConverter._io_quant_is_same(node)
106+
quant_supported = NodeConverter.uses_quantization_type_for_io(
107+
node,
108+
supported_types=[torch.int8, torch.uint8],
109+
input_indices=[0],
110+
output_indices=[0],
111+
)
64112

65-
# Only some specific bounds are supported on the target hardware.
66-
if bounds not in ClampConverter.SUPPORTED_BOUNDS.values():
67-
return False
113+
# We either convert to ReLU -> SingleInputQuantization pattern
114+
# or we convert to Min/Max, which requires same quantization on
115+
# both input and output.
116+
return (relu_compatible | io_quant_consistent) and quant_supported
68117

69-
return True
118+
return relu_compatible
70119

71120
@classmethod
72121
def supports_partitioning_result(
73122
cls,
74123
node: Node,
75124
partition_list: list[Partition],
76-
custom_delegation_options: CustomDelegationOptions,
125+
_: CustomDelegationOptions,
77126
neutron_target_spec: NeutronTargetSpec,
78127
parameters_mapping: dict[str, Parameter],
79128
) -> bool:
@@ -82,7 +131,10 @@ def supports_partitioning_result(
82131
# Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
83132
# and at the same time the node does not satisfy delegation requirements.
84133
# In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
85-
if bounds in [cls.SUPPORTED_BOUNDS["Relu"], cls.SUPPORTED_BOUNDS["Relu6"]]:
134+
if bounds in [
135+
cls.RELU_COMPATIBLE_BOUNDS["Relu"],
136+
cls.RELU_COMPATIBLE_BOUNDS["Relu6"],
137+
]:
86138
is_alone_in_partition = cls.is_node_alone_in_partition(
87139
node, partition_list, filter_fn=is_not_qdq_node
88140
)
@@ -91,8 +143,21 @@ def supports_partitioning_result(
91143

92144
return True
93145

146+
@staticmethod
147+
def _quantize_value(
148+
value: int,
149+
zp: int,
150+
scale: float,
151+
quant_min: int,
152+
quant_max: int,
153+
dtype: type = np.int8,
154+
) -> np.integer:
155+
rescaled_value = round(value / scale) + zp
156+
return dtype(np.clip(rescaled_value, quant_min, quant_max))
157+
94158
def convert(self, node: Node):
95-
"""Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators.
159+
"""Convert the `aten.clamp.default` operator to either
160+
Neutron IR `Relu*` operator or combination of `Min` and `Max`.
96161
The schema is:
97162
aten::clamp(
98163
Tensor self,
@@ -101,13 +166,69 @@ def convert(self, node: Node):
101166
) -> Tensor
102167
"""
103168
self.assert_convertible(node)
169+
to_relu = _is_convertible_to_relu(node)
104170

105171
bounds = self._get_clamp_bounds(node)
106-
107172
t_op = self._create_tflite_op_with_io_tensors(node)
108173

109-
# noinspection PyTypeChecker,PyUnboundLocalVariable
110-
t_op.opcode_index = self.builder.op_code_index_for_op_type(
111-
self.BOUNDS_TO_NEUTRON_IR_OP[bounds]
112-
)
113-
self.builder.append_operators([t_op])
174+
if not self.neutron_target_spec.use_new_flow_neutron_c or to_relu:
175+
# noinspection PyTypeChecker,PyUnboundLocalVariable
176+
t_op.opcode_index = self.builder.op_code_index_for_op_type(
177+
self.BOUNDS_TO_RELU_NEUTRON_IR_OP[bounds]
178+
)
179+
self.builder.append_operators([t_op])
180+
return
181+
182+
q_node = node.args[0]
183+
assert _is_dequant_node(q_node)
184+
_, scale, zp, quant_min, quant_max, _ = q_node.args
185+
186+
x = t_op.tmp_inputs[0]
187+
y = t_op.tmp_outputs[0]
188+
189+
if x.quantization is not None and y.quantization is None:
190+
propagate_quantization(x, y)
191+
192+
if x.quantization != y.quantization:
193+
raise AssertionError(
194+
"Input and output quantization should be same in order to convert to max/min."
195+
)
196+
197+
min_value, max_value = bounds
198+
199+
if min_value is not None:
200+
min_value = self._quantize_value(min_value, zp, scale, quant_min, quant_max)
201+
min_tensor = self.builder.create_tensor_for_data(
202+
np.array([min_value], np.int8), "min"
203+
)
204+
propagate_quantization(x, min_tensor)
205+
206+
if max_value is not None:
207+
max_value = self._quantize_value(max_value, zp, scale, quant_min, quant_max)
208+
max_tensor = self.builder.create_tensor_for_data(
209+
np.array([max_value], np.int8), "max"
210+
)
211+
propagate_quantization(x, max_tensor)
212+
213+
if None not in bounds:
214+
tmp_y = self.builder.duplicate_tensor(x)
215+
tmp_x = tmp_y
216+
propagate_quantization(x, tmp_y)
217+
else:
218+
tmp_y = y
219+
tmp_x = x
220+
221+
ops_to_add = []
222+
if max_value is not None:
223+
min_op = tflite_model.Operator(builtin_options=minimum_options.Minimum())
224+
min_op.tmp_inputs = [x, max_tensor]
225+
min_op.tmp_outputs = [tmp_y]
226+
ops_to_add.append(min_op)
227+
228+
if min_value is not None:
229+
max_op = tflite_model.Operator(builtin_options=maximum_options.Maximum())
230+
max_op.tmp_inputs = [tmp_x, min_tensor]
231+
max_op.tmp_outputs = [y]
232+
ops_to_add.append(max_op)
233+
234+
self.builder.append_operators(ops_to_add)

backends/nxp/quantizer/patterns.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
from functools import partial
1111

1212
import torch
13+
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
14+
_is_convertible_to_relu,
15+
)
1316
from executorch.backends.nxp.quantizer.utils import (
1417
get_bias_qparams,
1518
get_bias_qparams_transp_conv,
@@ -115,8 +118,9 @@ class SharedSpecPattern(QuantizationPattern):
115118
def partition_types(self) -> list[torch.nn.Module]:
116119
pass
117120

118-
def get_anchors(
119-
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
121+
@staticmethod
122+
def get_shared_spec_anchors(
123+
gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
120124
) -> PartitionAnchors | None:
121125
node = fused_partition[0].nodes[-1]
122126
assert len(fused_partition[0].input_nodes) == 1
@@ -137,15 +141,21 @@ def get_anchors(
137141
],
138142
)
139143

144+
def get_anchors(
145+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
146+
) -> PartitionAnchors | None:
147+
return self.get_shared_spec_anchors(gm, fused_partition)
148+
140149

141150
class SingleInputBasicPattern(QuantizationPattern):
142151
@abstractmethod
143152
def partition_types(self) -> list[OpOverload]:
144153
pass
145154

146-
def get_anchors(
147-
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
148-
) -> PartitionAnchors | None:
155+
@staticmethod
156+
def get_single_input_anchors(
157+
gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
158+
):
149159
node = fused_partition[0].nodes[-1]
150160

151161
return PartitionAnchors(
@@ -155,6 +165,11 @@ def get_anchors(
155165
output=[(node,)],
156166
)
157167

168+
def get_anchors(
169+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
170+
) -> PartitionAnchors | None:
171+
return self.get_single_input_anchors(gm, fused_partition)
172+
158173

159174
class BatchNormPattern(QuantizationPattern):
160175
def partition_types(self) -> list[OpOverload]:
@@ -408,12 +423,25 @@ def get_anchors(
408423
)
409424

410425

411-
class ClampPattern(SingleInputBasicPattern):
426+
class ClampPattern(QuantizationPattern):
412427
"""Quantizer for the `aten.clamp.default` operator."""
413428

414429
def partition_types(self):
415430
return [torch.ops.aten.clamp.default]
416431

432+
def get_anchors(
433+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
434+
) -> PartitionAnchors | None:
435+
node = fused_partition[0].nodes[-1]
436+
437+
if (
438+
self.neutron_quantizer.neutron_target_spec.use_new_flow_neutron_c
439+
and not _is_convertible_to_relu(node)
440+
):
441+
return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition)
442+
else:
443+
return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition)
444+
417445

418446
def _is_batch_norm(node_: Node) -> bool:
419447
return node_.op == "call_function" and node_.target in [

0 commit comments

Comments
 (0)