33# This source code is licensed under the BSD-style license found in the
44# LICENSE file in the root directory of this source tree.
55
6+ import math
7+
8+ import numpy as np
9+ import torch
610from executorch .backends .nxp .backend .edge_helper import try_get_arg
11+ from executorch .backends .nxp .backend .ir .converter .conversion .translator import (
12+ torch_type_to_numpy_type ,
13+ )
714from executorch .backends .nxp .backend .ir .converter .node_converter import (
15+ _is_dequant_node ,
16+ _is_quant_node ,
817 CustomDelegationOptions ,
918 is_not_qdq_node ,
1019 NodeConverter ,
1120)
21+ from executorch .backends .nxp .backend .ir .converter .quantization_utils import (
22+ propagate_quantization ,
23+ )
1224from executorch .backends .nxp .backend .ir .lib .tflite .BuiltinOperator import (
1325 BuiltinOperator ,
1426)
27+ from executorch .backends .nxp .backend .ir .tflite_generator import tflite_model
28+ from executorch .backends .nxp .backend .ir .tflite_generator .builtin_options import (
29+ maximum_options ,
30+ minimum_options ,
31+ )
1532from executorch .backends .nxp .backend .neutron_operator_support import (
1633 activation_supported_on_target ,
1734)
2138from torch .nn import Parameter
2239
2340
41+ def _is_convertible_to_relu (node ):
42+ bounds = ClampConverter ._get_clamp_bounds (node )
43+
44+ # Only some specific bounds are supported on the target hardware.
45+ if bounds not in ClampConverter .RELU_COMPATIBLE_BOUNDS .values ():
46+ return False
47+
48+ return True
49+
50+
2451class ClampConverter (NodeConverter ):
25- SUPPORTED_BOUNDS = {
52+ RELU_COMPATIBLE_BOUNDS = {
2653 "ReluN1To1" : (- 1 , 1 ),
2754 "Relu0To1" : (0 , 1 ),
2855 "Relu6" : (0 , 6 ),
2956 "Relu" : (0 , None ),
3057 }
3158
32- BOUNDS_TO_NEUTRON_IR_OP = {
59+ BOUNDS_TO_RELU_NEUTRON_IR_OP = {
3360 (- 1 , 1 ): BuiltinOperator .RELU_N1_TO_1 ,
3461 (0 , 1 ): BuiltinOperator .RELU_0_TO_1 ,
3562 (0 , 6 ): BuiltinOperator .RELU6 ,
@@ -53,27 +80,52 @@ def _is_supported_in_IR(
5380 # No NeutronIR-specific restrictions.
5481 return True
5582
83+ @staticmethod
84+ def _io_quant_is_same (node : Node ):
85+ quant = next (iter (node .users .keys ()))
86+ dequant = node .args [0 ]
87+
88+ if not _is_dequant_node (dequant ):
89+ return False
90+
91+ if not _is_quant_node (quant ):
92+ return False
93+
94+ q_params = quant .args [1 :]
95+ dq_params = dequant .args [1 :]
96+ return all ([q == dq for q , dq in zip (q_params , dq_params )])
97+
5698 @staticmethod
5799 def _is_supported_on_target (
58100 node : Node ,
59101 neutron_target_spec : NeutronTargetSpec ,
60102 parameters_mapping : dict [str , Parameter ],
61103 custom_delegation_options : CustomDelegationOptions ,
62104 ) -> bool :
63- bounds = ClampConverter ._get_clamp_bounds (node )
105+ relu_compatible = _is_convertible_to_relu (node )
106+
107+ if neutron_target_spec .use_new_flow_neutron_c :
108+ io_quant_consistent = ClampConverter ._io_quant_is_same (node )
109+ quant_supported = NodeConverter .uses_quantization_type_for_io (
110+ node ,
111+ supported_types = [torch .int8 , torch .uint8 ],
112+ input_indices = [0 ],
113+ output_indices = [0 ],
114+ )
64115
65- # Only some specific bounds are supported on the target hardware.
66- if bounds not in ClampConverter .SUPPORTED_BOUNDS .values ():
67- return False
116+ # We either convert to ReLU -> SingleInputQuantization pattern
117+ # or we convert to Min/Max, which requires same quantization on
118+ # both input and output.
119+ return (relu_compatible | io_quant_consistent ) and quant_supported
68120
69- return True
121+ return relu_compatible
70122
71123 @classmethod
72124 def supports_partitioning_result (
73125 cls ,
74126 node : Node ,
75127 partition_list : list [Partition ],
76- custom_delegation_options : CustomDelegationOptions ,
128+ _ : CustomDelegationOptions ,
77129 neutron_target_spec : NeutronTargetSpec ,
78130 parameters_mapping : dict [str , Parameter ],
79131 ) -> bool :
@@ -82,7 +134,10 @@ def supports_partitioning_result(
82134 # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
83135 # and at the same time the node does not satisfy delegation requirements.
84136 # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
85- if bounds in [cls .SUPPORTED_BOUNDS ["Relu" ], cls .SUPPORTED_BOUNDS ["Relu6" ]]:
137+ if bounds in [
138+ cls .RELU_COMPATIBLE_BOUNDS ["Relu" ],
139+ cls .RELU_COMPATIBLE_BOUNDS ["Relu6" ],
140+ ]:
86141 is_alone_in_partition = cls .is_node_alone_in_partition (
87142 node , partition_list , filter_fn = is_not_qdq_node
88143 )
@@ -91,8 +146,21 @@ def supports_partitioning_result(
91146
92147 return True
93148
149+ @staticmethod
150+ def _quantize_value (
151+ value : int ,
152+ zp : int ,
153+ scale : float ,
154+ quant_min : int ,
155+ quant_max : int ,
156+ dtype : type = np .int8 ,
157+ ) -> np .integer :
158+ rescaled_value = round (value / scale ) + zp
159+ return dtype (np .clip (rescaled_value , quant_min , quant_max ))
160+
94161 def convert (self , node : Node ):
95- """Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators.
162+ """Convert the `aten.clamp.default` operator to either
163+ Neutron IR `Relu*` operator or combination of `Min` and `Max`.
96164 The schema is:
97165 aten::clamp(
98166 Tensor self,
@@ -101,13 +169,81 @@ def convert(self, node: Node):
101169 ) -> Tensor
102170 """
103171 self .assert_convertible (node )
172+ to_relu = _is_convertible_to_relu (node )
104173
105174 bounds = self ._get_clamp_bounds (node )
106-
107175 t_op = self ._create_tflite_op_with_io_tensors (node )
108176
109- # noinspection PyTypeChecker,PyUnboundLocalVariable
110- t_op .opcode_index = self .builder .op_code_index_for_op_type (
111- self .BOUNDS_TO_NEUTRON_IR_OP [bounds ]
177+ if not self .neutron_target_spec .use_new_flow_neutron_c or to_relu :
178+ # noinspection PyTypeChecker,PyUnboundLocalVariable
179+ t_op .opcode_index = self .builder .op_code_index_for_op_type (
180+ self .BOUNDS_TO_RELU_NEUTRON_IR_OP [bounds ]
181+ )
182+ self .builder .append_operators ([t_op ])
183+ return
184+
185+ q_node = node .args [0 ]
186+ assert _is_dequant_node (q_node )
187+ _ , scale , zp , quant_min , quant_max , q_type = q_node .args
188+ q_type = torch_type_to_numpy_type (q_type ).type
189+
190+ x = t_op .tmp_inputs [0 ]
191+ y = t_op .tmp_outputs [0 ]
192+
193+ if x .quantization is not None and y .quantization is None :
194+ propagate_quantization (x , y )
195+
196+ min_value , max_value = (
197+ v if v is not None and math .isfinite (v ) else None for v in bounds
112198 )
113- self .builder .append_operators ([t_op ])
199+
200+ if min_value is not None :
201+ min_value = self ._quantize_value (
202+ value = min_value ,
203+ zp = zp ,
204+ scale = scale ,
205+ quant_min = quant_min ,
206+ quant_max = quant_max ,
207+ dtype = q_type ,
208+ )
209+ min_tensor = self .builder .create_tensor_for_data (
210+ np .array ([min_value ], q_type ), "min"
211+ )
212+ propagate_quantization (x , min_tensor )
213+
214+ if max_value is not None :
215+ max_value = self ._quantize_value (
216+ value = max_value ,
217+ zp = zp ,
218+ scale = scale ,
219+ quant_min = quant_min ,
220+ quant_max = quant_max ,
221+ dtype = q_type ,
222+ )
223+ max_tensor = self .builder .create_tensor_for_data (
224+ np .array ([max_value ], q_type ), "max"
225+ )
226+ propagate_quantization (x , max_tensor )
227+
228+ if None not in [min_value , max_value ]:
229+ tmp_y = self .builder .duplicate_tensor (x )
230+ tmp_x = tmp_y
231+ propagate_quantization (x , tmp_y )
232+ else :
233+ tmp_y = y
234+ tmp_x = x
235+
236+ ops_to_add = []
237+ if max_value is not None :
238+ min_op = tflite_model .Operator (builtin_options = minimum_options .Minimum ())
239+ min_op .tmp_inputs = [x , max_tensor ]
240+ min_op .tmp_outputs = [tmp_y ]
241+ ops_to_add .append (min_op )
242+
243+ if min_value is not None :
244+ max_op = tflite_model .Operator (builtin_options = maximum_options .Maximum ())
245+ max_op .tmp_inputs = [tmp_x , min_tensor ]
246+ max_op .tmp_outputs = [y ]
247+ ops_to_add .append (max_op )
248+
249+ self .builder .append_operators (ops_to_add )
0 commit comments