Skip to content

Commit e712c31

Browse files
committed
Arm backend: Minor composable_quantizer improvements
- Make sure the root node of a shared cluster is the topological first node of the model to avoid crash in torchao. - Do not report get_attr as non-annotated as these are never quantized, leading to unnecessarily long logs. - Make SharedQuantization logging more straight to the point. Do not warn for multiple users as this is resported in the quantizer report and use one single simple log message when nodes are left unquantized. - Make the pre-transform for annotation report more minimal and only print it when relevant + update the notebook example to this fact. Signed-off-by: Adrian Lundell <adrian.lundell@arm.com> Change-Id: I412918235799c99ab4b1b1e1f6412de4c906766f
1 parent 1debeb6 commit e712c31

4 files changed

Lines changed: 71 additions & 53 deletions

File tree

backends/arm/quantizer/arm_quantizer.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,23 @@ def _remove_annotations(self, model: GraphModule) -> GraphModule:
11091109

11101110
return model
11111111

1112+
def _log_nonquantized_nodes(self, model: GraphModule) -> None:
1113+
non_quantized_nodes = [
1114+
n
1115+
for n in model.graph.nodes
1116+
if n.meta.get(DISALLOW_TFA_META_KEY, True) and n.op != "get_attr"
1117+
]
1118+
if len(non_quantized_nodes) > 0:
1119+
msg = """
1120+
----------------------------------------------------------------------------------------------------
1121+
PRE-TRANSFORM FOR ANNOTATION QUANTIZATION REPORT
1122+
----------------------------------------------------------------------------------------------------
1123+
The following nodes are not marked for quantization and will not be decomposed in the transform for annotation pipeline:\n"""
1124+
for node in non_quantized_nodes:
1125+
msg += f" {node.name}\n"
1126+
1127+
logger.debug(msg)
1128+
11121129
def transform_for_annotation(self, model: GraphModule) -> GraphModule:
11131130
# Transform_for_annotation should only decompose ops if quantized, which is
11141131
# indicated either by node.meta['DISALLOW_TFA_META_KEY']==False or no such key
@@ -1121,15 +1138,13 @@ def transform_for_annotation(self, model: GraphModule) -> GraphModule:
11211138
# run to set DISALLOW_TFA_META_KEY for quantized nodes and all nodes missing
11221139
# this key afterwards are set to DISALLOW_TFA_META_KEY=True.
11231140

1124-
reporter = QuantizerReporter(
1125-
self.quantizers, "PRE-TRANSFORM_FOR_ANNOTATION QUANTIZATION REPORT" # type: ignore[arg-type]
1126-
)
11271141
model = super().annotate(model)
1128-
reporter.log_quantizer_report(model)
11291142
for node in model.graph.nodes:
11301143
if DISALLOW_TFA_META_KEY not in node.meta:
11311144
node.meta[DISALLOW_TFA_META_KEY] = True
11321145

1146+
self._log_nonquantized_nodes(model)
1147+
11331148
pass_manager = ArmPassManager(self.compile_spec)
11341149
transformed_model = pass_manager.transform_for_annotation_pipeline(model)
11351150

backends/arm/quantizer/arm_quantizer_utils.py

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -592,46 +592,41 @@ def _annotate_shared_cluster(self, root_node: Node) -> None:
592592
node_order = {node: index for index, node in enumerate(root_node.graph.nodes)}
593593
ordered_nodes = sorted(shared_nodes, key=lambda node: node_order.get(node, 0))
594594

595-
if len(adjacent_qspecs) > 0:
596-
if len(adjacent_qspecs) > 1:
597-
logger.warning(
598-
f"Multiple adjacent quantization specs found for {', '.join([n.name for n in ordered_nodes])}, all nodes will share the input quantization spec of {root_node.name}."
599-
)
595+
# Ensure the root node is the first one in the graph.
596+
root_node = ordered_nodes[0]
600597

598+
if len(adjacent_qspecs) > 0:
601599
root_node_float_inputs = self._get_input_nodes_with_float_output(root_node)
602-
if len(root_node_float_inputs) == 0:
603-
self.report_reject(
604-
ordered_nodes,
605-
"Couldn't find any floating point input to base shared quantization spec on.",
606-
)
607-
return
608-
root_node_first_input = root_node_float_inputs[0]
609-
610-
shared_qspec = SharedQuantizationSpec((root_node_first_input, root_node))
611-
for node in shared_nodes:
612-
input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
613-
n: shared_qspec # type: ignore[misc]
614-
for n in self._get_input_nodes_with_float_output(node)
615-
}
616-
if len(self._get_user_nodes_with_float_input(node)) == 0:
617-
output_qspec = None
618-
else:
619-
output_qspec = shared_qspec
620-
_mark_node_as_quantized(
621-
node, input_qspec_map, output_qspec, is_quantized=True
600+
if len(root_node_float_inputs) > 0:
601+
602+
root_node_first_input = root_node_float_inputs[0]
603+
shared_qspec = SharedQuantizationSpec(
604+
(root_node_first_input, root_node)
622605
)
606+
for node in shared_nodes:
607+
input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
608+
n: shared_qspec # type: ignore[misc]
609+
for n in self._get_input_nodes_with_float_output(node)
610+
}
611+
if len(self._get_user_nodes_with_float_input(node)) == 0:
612+
output_qspec = None
613+
else:
614+
output_qspec = shared_qspec
615+
_mark_node_as_quantized(
616+
node, input_qspec_map, output_qspec, is_quantized=True
617+
)
623618

624-
root_node.meta[Q_ANNOTATION_KEY].input_qspec_map[root_node_first_input] = (
625-
adjacent_qspecs[0]
626-
)
627-
self.report_accept(ordered_nodes)
619+
root_node.meta[Q_ANNOTATION_KEY].input_qspec_map[
620+
root_node_first_input
621+
] = adjacent_qspecs[0]
622+
self.report_accept(ordered_nodes)
623+
return
628624

629-
else:
630-
self.report_reject(
631-
ordered_nodes,
632-
"Couldn't find any adjacent quantization spec to base shared quantization spec on. You may however quantize these nodes manually if required.",
633-
)
634-
return
625+
self.report_reject(
626+
ordered_nodes,
627+
"All inputs and outputs to these nodes are non-quantized.",
628+
)
629+
return
635630

636631
def annotate(self, model: torch.fx.GraphModule) -> None: # type: ignore[override]
637632
for node in model.graph.nodes:

backends/cortex_m/quantizer_reporter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,9 @@ def unannotated_nodes_report(
383383
non_quantized_nodes: list[Node] = []
384384
else:
385385
non_quantized_nodes = [
386-
node for node in model.graph.nodes if Q_ANNOTATION_KEY not in node.meta
386+
node
387+
for node in model.graph.nodes
388+
if Q_ANNOTATION_KEY not in node.meta and node.op != "get_attr"
387389
]
388390

389391
rows = []

examples/arm/quantizer_tutorial.ipynb

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,25 @@
167167
"cell_type": "markdown",
168168
"metadata": {},
169169
"source": [
170-
"### The quantization report\n",
170+
"### Pre-transform for annotation quantization report\n",
171+
"Note that there are two quantization reports printed in this case, this is because the quantization\n",
172+
"annotator is run twice during the quantization for two different purposes.\n",
171173
"\n",
172-
"In the logged quantization report each quantizer has added one header describing targeted nodes, the used quantization config, and the supported operators / operator patterns. \n",
174+
"1. Mark nodes to be decomposed by the `transform_for_annotation` pipeline (PRE-TRANSFORM FOR ANNOTATION QUANTIZATION REPORT)\n",
175+
"2. Perform actual quantization after the `transform_for_annotation` pipeline (FINAL QUANTIZATION REPORT)\n",
176+
"\n",
177+
"Consider for example the `torch.div` operator, which is decomposed into one multiplication and one\n",
178+
"reciprocal operator when quantized. Since both ops in the decomposition require different quantization parameters, this decomposition needs to happen before quantization, in the `transform_for_annotation` pipeline, but on the other hand this\n",
179+
"decomposition must not happen if it should be kept in float.\n",
180+
"\n",
181+
"**This is important to be aware of when doing mixed quantization since this means that for an operator to be fully quantized,\n",
182+
"both the original operator and the decomposition needs to be targeted.**\n",
183+
"\n",
184+
"The pre-transform for annotation report prints all nodes which are not marked for decomposition (if any) to make this\n",
185+
"easy to get right. If regular full-graph quantization is done, this is simply skipped.\n",
186+
"\n",
187+
"### The final quantization report\n",
188+
"In the second quantization report, each quantizer has added one header describing targeted nodes, the used quantization config, and the supported operators / operator patterns. \n",
173189
"```\n",
174190
"PatternQuantizer using NodeNameNodeFinder targeting names: conv2d, relu\n",
175191
"Annotating with executorch.backends.arm.quantizer.arm_quantizer.get_symmetric_quantization_config(is_per_channel=True)\n",
@@ -187,7 +203,7 @@
187203
"```\n",
188204
" NODE NAME INPUT QSPEC MAP OUTPUT QSPEC MAP\n",
189205
" -- ----------- ---------------------------------------- ---------------------\n",
190-
" ╒ conv2d x: INT8_PER_TENSOR_QSPEC NO_QSPEC\n",
206+
" ╒ conv2d x: INT8_PER_TENSOR_QSPEC None\n",
191207
" | _param_constant0: INT8_PER_CHANNEL_QSPEC\n",
192208
" | _param_constant1: DERIVED_QSPEC\n",
193209
" ╘ relu \n",
@@ -198,16 +214,6 @@
198214
"many different quantization annotations for different types of tensors; per tensor for\n",
199215
"activations, per channel for weights, and a special quantization spec for the int32 bias. \n",
200216
"\n",
201-
"### Pre-transform for annotation vs. final quantization report\n",
202-
"One important detail is that there are two reports printed, one named PRE-TRANSFORM_FOR_ANNOTATION QUANTIZATION REPORT,\n",
203-
"and one named FINAL QUANTIZATION REPORT. This is related to the fact that some operators has to be decomposed before quantization to ensure\n",
204-
"that all \"sub operators\" gets quantized properly. As an example, the division operator in the first report\n",
205-
"has decomposed into a reciprocal and multiplication operator in the second. Had it not been marked for quantization\n",
206-
"in the first step, it would have remained a single division operator.\n",
207-
"\n",
208-
"**This is important to be aware of when doing mixed quantization since this means that for an operator to be fully quantized,\n",
209-
"both the original operator and the decomposition needs to be targeted.**\n",
210-
"\n",
211217
"### SharedQspecQuantizer\n",
212218
"Last in the report there is always an additional quantizer applied which is not specified by the user, the SharedQspecQuantizer.\n",
213219
"It handles data shuffling operators without numerical behaviour such as copies and reshapes to ensure that they are quantized with the same qspec as\n",

0 commit comments

Comments
 (0)