33# This source code is licensed under the BSD-style license found in the
44# LICENSE file in the root directory of this source tree.
55
6+ from copy import copy
7+
8+ import numpy as np
9+ import torch
610from executorch .backends .nxp .backend .edge_helper import try_get_arg
711from executorch .backends .nxp .backend .ir .converter .node_converter import (
12+ _is_dequant_node ,
13+ _is_quant_node ,
814 CustomDelegationOptions ,
915 is_not_qdq_node ,
1016 NodeConverter ,
1117)
18+ from executorch .backends .nxp .backend .ir .converter .quantization_utils import (
19+ propagate_quantization ,
20+ )
1221from executorch .backends .nxp .backend .ir .lib .tflite .BuiltinOperator import (
1322 BuiltinOperator ,
1423)
24+ from executorch .backends .nxp .backend .ir .tflite_generator import tflite_model
25+ from executorch .backends .nxp .backend .ir .tflite_generator .builtin_options import (
26+ maximum_options ,
27+ minimum_options ,
28+ )
1529from executorch .backends .nxp .backend .neutron_operator_support import (
1630 activation_supported_on_target ,
1731)
2135from torch .nn import Parameter
2236
2337
38+ def _is_convertible_to_relu (node ):
39+ bounds = ClampConverter ._get_clamp_bounds (node )
40+
41+ # Only some specific bounds are supported on the target hardware.
42+ if bounds not in ClampConverter .RELU_COMPATIBLE_BOUNDS .values ():
43+ return False
44+
45+ return True
46+
47+
2448class ClampConverter (NodeConverter ):
25- SUPPORTED_BOUNDS = {
49+ RELU_COMPATIBLE_BOUNDS = {
2650 "ReluN1To1" : (- 1 , 1 ),
2751 "Relu0To1" : (0 , 1 ),
2852 "Relu6" : (0 , 6 ),
2953 "Relu" : (0 , None ),
3054 }
3155
32- BOUNDS_TO_NEUTRON_IR_OP = {
56+ BOUNDS_TO_RELU_NEUTRON_IR_OP = {
3357 (- 1 , 1 ): BuiltinOperator .RELU_N1_TO_1 ,
3458 (0 , 1 ): BuiltinOperator .RELU_0_TO_1 ,
3559 (0 , 6 ): BuiltinOperator .RELU6 ,
@@ -53,27 +77,52 @@ def _is_supported_in_IR(
5377 # No NeutronIR-specific restrictions.
5478 return True
5579
80+ @staticmethod
81+ def _io_quant_is_same (node : Node ):
82+ quant = next (iter (node .users .keys ()))
83+ dequant = node .args [0 ]
84+
85+ if not _is_dequant_node (dequant ):
86+ return False
87+
88+ if not _is_quant_node (quant ):
89+ return False
90+
91+ q_params = quant .args [1 :]
92+ dq_params = dequant .args [1 :]
93+ return all ([q == dq for q , dq in zip (q_params , dq_params )])
94+
5695 @staticmethod
5796 def _is_supported_on_target (
5897 node : Node ,
5998 neutron_target_spec : NeutronTargetSpec ,
6099 parameters_mapping : dict [str , Parameter ],
61100 custom_delegation_options : CustomDelegationOptions ,
62101 ) -> bool :
63- bounds = ClampConverter ._get_clamp_bounds (node )
102+ relu_compatible = _is_convertible_to_relu (node )
103+
104+ if neutron_target_spec .use_new_flow_neutron_c :
105+ io_quant_consistent = ClampConverter ._io_quant_is_same (node )
106+ quant_supported = NodeConverter .uses_quantization_type_for_io (
107+ node ,
108+ supported_types = [torch .int8 , torch .uint8 ],
109+ input_indices = [0 ],
110+ output_indices = [0 ],
111+ )
64112
65- # Only some specific bounds are supported on the target hardware.
66- if bounds not in ClampConverter .SUPPORTED_BOUNDS .values ():
67- return False
113+ # We either convert to ReLU -> SingleInputQuantization pattern
114+ # or we convert to Min/Max, which requires same quantization on
115+ # both input and output.
116+ return (relu_compatible | io_quant_consistent ) and quant_supported
68117
69- return True
118+ return relu_compatible
70119
71120 @classmethod
72121 def supports_partitioning_result (
73122 cls ,
74123 node : Node ,
75124 partition_list : list [Partition ],
76- custom_delegation_options : CustomDelegationOptions ,
125+ _ : CustomDelegationOptions ,
77126 neutron_target_spec : NeutronTargetSpec ,
78127 parameters_mapping : dict [str , Parameter ],
79128 ) -> bool :
@@ -82,7 +131,10 @@ def supports_partitioning_result(
82131 # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
83132 # and at the same time the node does not satisfy delegation requirements.
84133 # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
85- if bounds in [cls .SUPPORTED_BOUNDS ["Relu" ], cls .SUPPORTED_BOUNDS ["Relu6" ]]:
134+ if bounds in [
135+ cls .RELU_COMPATIBLE_BOUNDS ["Relu" ],
136+ cls .RELU_COMPATIBLE_BOUNDS ["Relu6" ],
137+ ]:
86138 is_alone_in_partition = cls .is_node_alone_in_partition (
87139 node , partition_list , filter_fn = is_not_qdq_node
88140 )
@@ -91,8 +143,21 @@ def supports_partitioning_result(
91143
92144 return True
93145
146+ @staticmethod
147+ def _quantize_value (
148+ value : int ,
149+ zp : int ,
150+ scale : float ,
151+ quant_min : int ,
152+ quant_max : int ,
153+ dtype : type = np .int8 ,
154+ ) -> np .integer :
155+ rescaled_value = round (value / scale ) + zp
156+ return dtype (np .clip (rescaled_value , quant_min , quant_max ))
157+
94158 def convert (self , node : Node ):
95- """Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators.
159+ """Convert the `aten.clamp.default` operator to either
160+ Neutron IR `Relu*` operator or combination of `Min` and `Max`.
96161 The schema is:
97162 aten::clamp(
98163 Tensor self,
@@ -101,13 +166,69 @@ def convert(self, node: Node):
101166 ) -> Tensor
102167 """
103168 self .assert_convertible (node )
169+ to_relu = _is_convertible_to_relu (node )
104170
105171 bounds = self ._get_clamp_bounds (node )
106-
107172 t_op = self ._create_tflite_op_with_io_tensors (node )
108173
109- # noinspection PyTypeChecker,PyUnboundLocalVariable
110- t_op .opcode_index = self .builder .op_code_index_for_op_type (
111- self .BOUNDS_TO_NEUTRON_IR_OP [bounds ]
112- )
113- self .builder .append_operators ([t_op ])
174+ if not self .neutron_target_spec .use_new_flow_neutron_c or to_relu :
175+ # noinspection PyTypeChecker,PyUnboundLocalVariable
176+ t_op .opcode_index = self .builder .op_code_index_for_op_type (
177+ self .BOUNDS_TO_RELU_NEUTRON_IR_OP [bounds ]
178+ )
179+ self .builder .append_operators ([t_op ])
180+ return
181+
182+ q_node = node .args [0 ]
183+ assert _is_dequant_node (q_node )
184+ _ , scale , zp , quant_min , quant_max , _ = q_node .args
185+
186+ x = t_op .tmp_inputs [0 ]
187+ y = t_op .tmp_outputs [0 ]
188+
189+ if x .quantization is not None and y .quantization is None :
190+ propagate_quantization (x , y )
191+
192+ if x .quantization != y .quantization :
193+ raise AssertionError (
194+ "Input and output quantization should be same in order to convert to max/min."
195+ )
196+
197+ min_value , max_value = bounds
198+
199+ if min_value is not None :
200+ min_value = self ._quantize_value (min_value , zp , scale , quant_min , quant_max )
201+ min_tensor = self .builder .create_tensor_for_data (
202+ np .array ([min_value ], np .int8 ), "min"
203+ )
204+ propagate_quantization (x , min_tensor )
205+
206+ if max_value is not None :
207+ max_value = self ._quantize_value (max_value , zp , scale , quant_min , quant_max )
208+ max_tensor = self .builder .create_tensor_for_data (
209+ np .array ([max_value ], np .int8 ), "max"
210+ )
211+ propagate_quantization (x , max_tensor )
212+
213+ if None not in bounds :
214+ tmp_y = self .builder .duplicate_tensor (x )
215+ tmp_x = tmp_y
216+ propagate_quantization (x , tmp_y )
217+ else :
218+ tmp_y = y
219+ tmp_x = x
220+
221+ ops_to_add = []
222+ if max_value is not None :
223+ min_op = tflite_model .Operator (builtin_options = minimum_options .Minimum ())
224+ min_op .tmp_inputs = [x , max_tensor ]
225+ min_op .tmp_outputs = [tmp_y ]
226+ ops_to_add .append (min_op )
227+
228+ if min_value is not None :
229+ max_op = tflite_model .Operator (builtin_options = maximum_options .Maximum ())
230+ max_op .tmp_inputs = [tmp_x , min_tensor ]
231+ max_op .tmp_outputs = [y ]
232+ ops_to_add .append (max_op )
233+
234+ self .builder .append_operators (ops_to_add )
0 commit comments