33# This source code is licensed under the BSD-style license found in the
44# LICENSE file in the root directory of this source tree.
55
6+ import math
7+
8+ import numpy as np
9+ import torch
610from executorch .backends .nxp .backend .edge_helper import try_get_arg
11+ from executorch .backends .nxp .backend .ir .converter .conversion .translator import (
12+ torch_type_to_numpy_type ,
13+ )
714from executorch .backends .nxp .backend .ir .converter .node_converter import (
15+ _is_dequant_node ,
16+ _is_quant_node ,
817 CustomDelegationOptions ,
918 is_not_qdq_node ,
1019 NodeConverter ,
1120)
21+ from executorch .backends .nxp .backend .ir .converter .quantization_utils import (
22+ propagate_quantization ,
23+ )
1224from executorch .backends .nxp .backend .ir .lib .tflite .BuiltinOperator import (
1325 BuiltinOperator ,
1426)
27+ from executorch .backends .nxp .backend .ir .tflite_generator import tflite_model
28+ from executorch .backends .nxp .backend .ir .tflite_generator .builtin_options import (
29+ maximum_options ,
30+ minimum_options ,
31+ )
1532from executorch .backends .nxp .backend .neutron_operator_support import (
1633 activation_supported_on_target ,
1734)
2138from torch .nn import Parameter
2239
2340
41+ def _is_convertible_to_relu (node ):
42+ bounds = ClampConverter ._get_clamp_bounds (node )
43+ bounds = tuple (v if v is not None and math .isfinite (v ) else None for v in bounds )
44+
45+ # Some specific bounds can be replaced with single op ReLU.
46+ if bounds not in ClampConverter .RELU_COMPATIBLE_BOUNDS .values ():
47+ return False
48+
49+ return True
50+
51+
2452class ClampConverter (NodeConverter ):
25- SUPPORTED_BOUNDS = {
53+ RELU_COMPATIBLE_BOUNDS = {
2654 "ReluN1To1" : (- 1 , 1 ),
2755 "Relu0To1" : (0 , 1 ),
2856 "Relu6" : (0 , 6 ),
2957 "Relu" : (0 , None ),
3058 }
3159
32- BOUNDS_TO_NEUTRON_IR_OP = {
60+ BOUNDS_TO_RELU_NEUTRON_IR_OP = {
3361 (- 1 , 1 ): BuiltinOperator .RELU_N1_TO_1 ,
3462 (0 , 1 ): BuiltinOperator .RELU_0_TO_1 ,
3563 (0 , 6 ): BuiltinOperator .RELU6 ,
@@ -53,27 +81,56 @@ def _is_supported_in_IR(
5381 # No NeutronIR-specific restrictions.
5482 return True
5583
84+ @staticmethod
85+ def _io_quant_is_same (node : Node ):
86+ quant = next (iter (node .users .keys ()))
87+ dequant = node .args [0 ]
88+
89+ if not _is_dequant_node (dequant ):
90+ return False
91+
92+ if not _is_quant_node (quant ):
93+ return False
94+
95+ q_params = quant .args [1 :]
96+ dq_params = dequant .args [1 :]
97+ return all (q == dq for q , dq in zip (q_params , dq_params ))
98+
5699 @staticmethod
57100 def _is_supported_on_target (
58101 node : Node ,
59102 neutron_target_spec : NeutronTargetSpec ,
60103 parameters_mapping : dict [str , Parameter ],
61104 custom_delegation_options : CustomDelegationOptions ,
62105 ) -> bool :
106+ relu_compatible = _is_convertible_to_relu (node )
63107 bounds = ClampConverter ._get_clamp_bounds (node )
64108
65- # Only some specific bounds are supported on the target hardware.
66- if bounds not in ClampConverter .SUPPORTED_BOUNDS .values ():
109+ if all (b is None or math .isinf (b ) for b in bounds ):
67110 return False
68111
69- return True
112+ if neutron_target_spec .use_new_flow_neutron_c :
113+ io_quant_consistent = ClampConverter ._io_quant_is_same (node )
114+ quant_supported = NodeConverter .uses_quantization_type_for_io (
115+ node ,
116+ supported_types = [torch .int8 , torch .uint8 ],
117+ input_indices = [0 ],
118+ output_indices = [0 ],
119+ )
120+
121+ # We either convert to ReLU -> SingleInputQuantization pattern
122+ # or we convert to Min/Max, which requires same quantization on
123+ # both input and output.
124+ return (relu_compatible | io_quant_consistent ) and quant_supported
125+
126+ return relu_compatible
70127
71128 @classmethod
72129 def supports_partitioning_result (
73130 cls ,
74131 node : Node ,
75132 partition_list : list [Partition ],
76- custom_delegation_options : CustomDelegationOptions ,
133+ _ : CustomDelegationOptions ,
77134 neutron_target_spec : NeutronTargetSpec ,
78135 parameters_mapping : dict [str , Parameter ],
79136 ) -> bool :
@@ -82,7 +139,10 @@ def supports_partitioning_result(
82139 # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
83140 # and at the same time the node does not satisfy delegation requirements.
84141 # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
85- if bounds in [cls .SUPPORTED_BOUNDS ["Relu" ], cls .SUPPORTED_BOUNDS ["Relu6" ]]:
142+ if bounds in [
143+ cls .RELU_COMPATIBLE_BOUNDS ["Relu" ],
144+ cls .RELU_COMPATIBLE_BOUNDS ["Relu6" ],
145+ ]:
86146 is_alone_in_partition = cls .is_node_alone_in_partition (
87147 node , partition_list , filter_fn = is_not_qdq_node
88148 )
@@ -91,8 +151,21 @@ def supports_partitioning_result(
91151
92152 return True
93153
154+ @staticmethod
155+ def _quantize_value (
156+ value : int ,
157+ zp : int ,
158+ scale : float ,
159+ quant_min : int ,
160+ quant_max : int ,
161+ dtype : type = np .int8 ,
162+ ) -> np .integer :
163+ rescaled_value = round (value / scale ) + zp
164+ return dtype (np .clip (rescaled_value , quant_min , quant_max ))
165+
94166 def convert (self , node : Node ):
95- """Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators.
167+ """Convert the `aten.clamp.default` operator to either
168+ Neutron IR `Relu*` operator or combination of `Min` and `Max`.
96169 The schema is:
97170 aten::clamp(
98171 Tensor self,
@@ -101,13 +174,83 @@ def convert(self, node: Node):
101174 ) -> Tensor
102175 """
103176 self .assert_convertible (node )
177+ to_relu = _is_convertible_to_relu (node )
104178
105179 bounds = self ._get_clamp_bounds (node )
106-
180+ bounds = tuple (
181+ v if v is not None and math .isfinite (v ) else None for v in bounds
182+ )
107183 t_op = self ._create_tflite_op_with_io_tensors (node )
108184
109- # noinspection PyTypeChecker,PyUnboundLocalVariable
110- t_op .opcode_index = self .builder .op_code_index_for_op_type (
111- self .BOUNDS_TO_NEUTRON_IR_OP [bounds ]
112- )
113- self .builder .append_operators ([t_op ])
185+ # Clamp convertible to some variant of ReLU
186+ if not self .neutron_target_spec .use_new_flow_neutron_c or to_relu :
187+ # noinspection PyTypeChecker,PyUnboundLocalVariable
188+ t_op .opcode_index = self .builder .op_code_index_for_op_type (
189+ self .BOUNDS_TO_RELU_NEUTRON_IR_OP [bounds ]
190+ )
191+ self .builder .append_operators ([t_op ])
192+ return
193+
194+ q_node = node .args [0 ]
195+ assert _is_dequant_node (q_node )
196+ _ , scale , zp , quant_min , quant_max , q_type = q_node .args
197+ q_type = torch_type_to_numpy_type (q_type ).type
198+
199+ x = t_op .tmp_inputs [0 ]
200+ y = t_op .tmp_outputs [0 ]
201+
202+ if x .quantization is not None and y .quantization is None :
203+ propagate_quantization (x , y )
204+
205+ min_value , max_value = bounds
206+
207+ if min_value is not None :
208+ min_value = self ._quantize_value (
209+ value = min_value ,
210+ zp = zp ,
211+ scale = scale ,
212+ quant_min = quant_min ,
213+ quant_max = quant_max ,
214+ dtype = q_type ,
215+ )
216+ min_tensor = self .builder .create_tensor_for_data (
217+ np .array ([min_value ], q_type ), "min"
218+ )
219+ propagate_quantization (x , min_tensor )
220+
221+ if max_value is not None :
222+ max_value = self ._quantize_value (
223+ value = max_value ,
224+ zp = zp ,
225+ scale = scale ,
226+ quant_min = quant_min ,
227+ quant_max = quant_max ,
228+ dtype = q_type ,
229+ )
230+ max_tensor = self .builder .create_tensor_for_data (
231+ np .array ([max_value ], q_type ), "max"
232+ )
233+ propagate_quantization (x , max_tensor )
234+
235+ if None not in bounds :
236+ tmp_y = self .builder .duplicate_tensor (x )
237+ tmp_x = tmp_y
238+ propagate_quantization (x , tmp_y )
239+ else :
240+ tmp_y = y
241+ tmp_x = x
242+
243+ ops_to_add = []
244+ if max_value is not None :
245+ min_op = tflite_model .Operator (builtin_options = minimum_options .Minimum ())
246+ min_op .tmp_inputs = [x , max_tensor ]
247+ min_op .tmp_outputs = [tmp_y ]
248+ ops_to_add .append (min_op )
249+
250+ if min_value is not None :
251+ max_op = tflite_model .Operator (builtin_options = maximum_options .Maximum ())
252+ max_op .tmp_inputs = [tmp_x , min_tensor ]
253+ max_op .tmp_outputs = [y ]
254+ ops_to_add .append (max_op )
255+
256+ self .builder .append_operators (ops_to_add )
0 commit comments