Skip to content

Commit 87266c1

Browse files
committed
NXP backend: Enable new Neutron C flow support for Clamp operator
1 parent 00dbb99 commit 87266c1

2 files changed

Lines changed: 191 additions & 20 deletions

File tree

backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py

Lines changed: 157 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,32 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6+
import math
7+
8+
import numpy as np
9+
import torch
610
from executorch.backends.nxp.backend.edge_helper import try_get_arg
11+
from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
12+
torch_type_to_numpy_type,
13+
)
714
from executorch.backends.nxp.backend.ir.converter.node_converter import (
15+
_is_dequant_node,
16+
_is_quant_node,
817
CustomDelegationOptions,
918
is_not_qdq_node,
1019
NodeConverter,
1120
)
21+
from executorch.backends.nxp.backend.ir.converter.quantization_utils import (
22+
propagate_quantization,
23+
)
1224
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
1325
BuiltinOperator,
1426
)
27+
from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
28+
from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
29+
maximum_options,
30+
minimum_options,
31+
)
1532
from executorch.backends.nxp.backend.neutron_operator_support import (
1633
activation_supported_on_target,
1734
)
@@ -21,15 +38,26 @@
2138
from torch.nn import Parameter
2239

2340

41+
def _is_convertible_to_relu(node):
42+
bounds = ClampConverter._get_clamp_bounds(node)
43+
bounds = tuple(v if v is not None and math.isfinite(v) else None for v in bounds)
44+
45+
# Some specific bounds can be replaced with single op ReLU.
46+
if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values():
47+
return False
48+
49+
return True
50+
51+
2452
class ClampConverter(NodeConverter):
25-
SUPPORTED_BOUNDS = {
53+
RELU_COMPATIBLE_BOUNDS = {
2654
"ReluN1To1": (-1, 1),
2755
"Relu0To1": (0, 1),
2856
"Relu6": (0, 6),
2957
"Relu": (0, None),
3058
}
3159

32-
BOUNDS_TO_NEUTRON_IR_OP = {
60+
BOUNDS_TO_RELU_NEUTRON_IR_OP = {
3361
(-1, 1): BuiltinOperator.RELU_N1_TO_1,
3462
(0, 1): BuiltinOperator.RELU_0_TO_1,
3563
(0, 6): BuiltinOperator.RELU6,
@@ -53,27 +81,56 @@ def _is_supported_in_IR(
5381
# No NeutronIR-specific restrictions.
5482
return True
5583

84+
@staticmethod
85+
def _io_quant_is_same(node: Node):
86+
quant = next(iter(node.users.keys()))
87+
dequant = node.args[0]
88+
89+
if not _is_dequant_node(dequant):
90+
return False
91+
92+
if not _is_quant_node(quant):
93+
return False
94+
95+
q_params = quant.args[1:]
96+
dq_params = dequant.args[1:]
97+
return all(q == dq for q, dq in zip(q_params, dq_params))
98+
5699
@staticmethod
57100
def _is_supported_on_target(
58101
node: Node,
59102
neutron_target_spec: NeutronTargetSpec,
60103
parameters_mapping: dict[str, Parameter],
61104
custom_delegation_options: CustomDelegationOptions,
62105
) -> bool:
106+
relu_compatible = _is_convertible_to_relu(node)
63107
bounds = ClampConverter._get_clamp_bounds(node)
64108

65-
# Only some specific bounds are supported on the target hardware.
66-
if bounds not in ClampConverter.SUPPORTED_BOUNDS.values():
109+
if all(b is None or math.isinf(b) for b in bounds):
67110
return False
68111

69-
return True
112+
if neutron_target_spec.use_new_flow_neutron_c:
113+
io_quant_consistent = ClampConverter._io_quant_is_same(node)
114+
quant_supported = NodeConverter.uses_quantization_type_for_io(
115+
node,
116+
supported_types=[torch.int8, torch.uint8],
117+
input_indices=[0],
118+
output_indices=[0],
119+
)
120+
121+
# We either convert to ReLU -> SingleInputQuantization pattern
122+
# or we convert to Min/Max, which requires same quantization on
123+
# both input and output.
124+
return (relu_compatible | io_quant_consistent) and quant_supported
125+
126+
return relu_compatible
70127

71128
@classmethod
72129
def supports_partitioning_result(
73130
cls,
74131
node: Node,
75132
partition_list: list[Partition],
76-
custom_delegation_options: CustomDelegationOptions,
133+
_: CustomDelegationOptions,
77134
neutron_target_spec: NeutronTargetSpec,
78135
parameters_mapping: dict[str, Parameter],
79136
) -> bool:
@@ -82,7 +139,10 @@ def supports_partitioning_result(
82139
# Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
83140
# and at the same time the node does not satisfy delegation requirements.
84141
# In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
85-
if bounds in [cls.SUPPORTED_BOUNDS["Relu"], cls.SUPPORTED_BOUNDS["Relu6"]]:
142+
if bounds in [
143+
cls.RELU_COMPATIBLE_BOUNDS["Relu"],
144+
cls.RELU_COMPATIBLE_BOUNDS["Relu6"],
145+
]:
86146
is_alone_in_partition = cls.is_node_alone_in_partition(
87147
node, partition_list, filter_fn=is_not_qdq_node
88148
)
@@ -91,8 +151,21 @@ def supports_partitioning_result(
91151

92152
return True
93153

154+
@staticmethod
155+
def _quantize_value(
156+
value: int,
157+
zp: int,
158+
scale: float,
159+
quant_min: int,
160+
quant_max: int,
161+
dtype: type = np.int8,
162+
) -> np.integer:
163+
rescaled_value = round(value / scale) + zp
164+
return dtype(np.clip(rescaled_value, quant_min, quant_max))
165+
94166
def convert(self, node: Node):
95-
"""Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators.
167+
"""Convert the `aten.clamp.default` operator to either
168+
Neutron IR `Relu*` operator or combination of `Min` and `Max`.
96169
The schema is:
97170
aten::clamp(
98171
Tensor self,
@@ -101,13 +174,83 @@ def convert(self, node: Node):
101174
) -> Tensor
102175
"""
103176
self.assert_convertible(node)
177+
to_relu = _is_convertible_to_relu(node)
104178

105179
bounds = self._get_clamp_bounds(node)
106-
180+
bounds = tuple(
181+
v if v is not None and math.isfinite(v) else None for v in bounds
182+
)
107183
t_op = self._create_tflite_op_with_io_tensors(node)
108184

109-
# noinspection PyTypeChecker,PyUnboundLocalVariable
110-
t_op.opcode_index = self.builder.op_code_index_for_op_type(
111-
self.BOUNDS_TO_NEUTRON_IR_OP[bounds]
112-
)
113-
self.builder.append_operators([t_op])
185+
# Clamp convertible to some variant of ReLU
186+
if not self.neutron_target_spec.use_new_flow_neutron_c or to_relu:
187+
# noinspection PyTypeChecker,PyUnboundLocalVariable
188+
t_op.opcode_index = self.builder.op_code_index_for_op_type(
189+
self.BOUNDS_TO_RELU_NEUTRON_IR_OP[bounds]
190+
)
191+
self.builder.append_operators([t_op])
192+
return
193+
194+
q_node = node.args[0]
195+
assert _is_dequant_node(q_node)
196+
_, scale, zp, quant_min, quant_max, q_type = q_node.args
197+
q_type = torch_type_to_numpy_type(q_type).type
198+
199+
x = t_op.tmp_inputs[0]
200+
y = t_op.tmp_outputs[0]
201+
202+
if x.quantization is not None and y.quantization is None:
203+
propagate_quantization(x, y)
204+
205+
min_value, max_value = bounds
206+
207+
if min_value is not None:
208+
min_value = self._quantize_value(
209+
value=min_value,
210+
zp=zp,
211+
scale=scale,
212+
quant_min=quant_min,
213+
quant_max=quant_max,
214+
dtype=q_type,
215+
)
216+
min_tensor = self.builder.create_tensor_for_data(
217+
np.array([min_value], q_type), "min"
218+
)
219+
propagate_quantization(x, min_tensor)
220+
221+
if max_value is not None:
222+
max_value = self._quantize_value(
223+
value=max_value,
224+
zp=zp,
225+
scale=scale,
226+
quant_min=quant_min,
227+
quant_max=quant_max,
228+
dtype=q_type,
229+
)
230+
max_tensor = self.builder.create_tensor_for_data(
231+
np.array([max_value], q_type), "max"
232+
)
233+
propagate_quantization(x, max_tensor)
234+
235+
if None not in bounds:
236+
tmp_y = self.builder.duplicate_tensor(x)
237+
tmp_x = tmp_y
238+
propagate_quantization(x, tmp_y)
239+
else:
240+
tmp_y = y
241+
tmp_x = x
242+
243+
ops_to_add = []
244+
if max_value is not None:
245+
min_op = tflite_model.Operator(builtin_options=minimum_options.Minimum())
246+
min_op.tmp_inputs = [x, max_tensor]
247+
min_op.tmp_outputs = [tmp_y]
248+
ops_to_add.append(min_op)
249+
250+
if min_value is not None:
251+
max_op = tflite_model.Operator(builtin_options=maximum_options.Maximum())
252+
max_op.tmp_inputs = [tmp_x, min_tensor]
253+
max_op.tmp_outputs = [y]
254+
ops_to_add.append(max_op)
255+
256+
self.builder.append_operators(ops_to_add)

backends/nxp/quantizer/patterns.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
from functools import partial
1111

1212
import torch
13+
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
14+
_is_convertible_to_relu,
15+
)
1316
from executorch.backends.nxp.quantizer.utils import (
1417
get_bias_qparams,
1518
get_bias_qparams_transp_conv,
@@ -114,8 +117,9 @@ class SharedSpecPattern(QuantizationPattern):
114117
def partition_types(self) -> list[torch.nn.Module]:
115118
pass
116119

117-
def get_anchors(
118-
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
120+
@staticmethod
121+
def get_shared_spec_anchors(
122+
gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
119123
) -> PartitionAnchors | None:
120124
node = fused_partition[0].nodes[-1]
121125
assert len(fused_partition[0].input_nodes) == 1
@@ -136,15 +140,21 @@ def get_anchors(
136140
],
137141
)
138142

143+
def get_anchors(
144+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
145+
) -> PartitionAnchors | None:
146+
return self.get_shared_spec_anchors(gm, fused_partition)
147+
139148

140149
class SingleInputBasicPattern(QuantizationPattern):
141150
@abstractmethod
142151
def partition_types(self) -> list[OpOverload]:
143152
pass
144153

145-
def get_anchors(
146-
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
147-
) -> PartitionAnchors | None:
154+
@staticmethod
155+
def get_single_input_anchors(
156+
gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
157+
):
148158
node = fused_partition[0].nodes[-1]
149159

150160
return PartitionAnchors(
@@ -154,6 +164,11 @@ def get_anchors(
154164
output=[(node,)],
155165
)
156166

167+
def get_anchors(
168+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
169+
) -> PartitionAnchors | None:
170+
return self.get_single_input_anchors(gm, fused_partition)
171+
157172

158173
class BatchNormPattern(QuantizationPattern):
159174
def partition_types(self) -> list[OpOverload]:
@@ -408,7 +423,7 @@ def get_anchors(
408423
)
409424

410425

411-
class ClampPattern(SingleInputBasicPattern):
426+
class ClampPattern(QuantizationPattern):
412427
"""Quantizer for the `aten.clamp.default` operator."""
413428

414429
def __init__(self, neutron_quantizer, is_qat=False):
@@ -418,6 +433,19 @@ def __init__(self, neutron_quantizer, is_qat=False):
418433
def partition_types(self):
419434
return [torch.ops.aten.clamp.default]
420435

436+
def get_anchors(
437+
self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
438+
) -> PartitionAnchors | None:
439+
node = fused_partition[0].nodes[-1]
440+
441+
if (
442+
self.neutron_quantizer.neutron_target_spec.use_new_flow_neutron_c
443+
and not _is_convertible_to_relu(node)
444+
):
445+
return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition)
446+
else:
447+
return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition)
448+
421449

422450
def _is_batch_norm(node_: Node) -> bool:
423451
return node_.op == "call_function" and node_.target in [

0 commit comments

Comments
 (0)