Skip to content

Commit 48ec3fc

Browse files
martinlsmCopilot
andauthored
Arm backend: Rework reporting of qspecs and qconfigs (#19016)
This PR contains four commits: 1. **Move quantizer_reporter out from cortex_m.quantizer** When importing `backends.cortex_m.quantizer.quantizer_reporter` from `backends.arm.quantizer.arm_quantizer_utils`, a cyclic dependency chain is is formed. The problem is that quantizer_reporter triggers `backends/cortex_m/quantizer/__init__.py` when imported, which in turn has imports leading back to the Arm backend. To fix this problem, move quantizer_reporter to backends/cortex_m so it can be imported without forming any cycle. 3. **Arm backend: Remove _QuantizerReporterUserMixin** _QuantizerReporterUserMixin was a duplicated class with the name QuantizerReporterUser. Remove the former to instead use the latter. 4. **Arm backend: Add label attribute to QuantizationConfig** The quantizer reporter logs the quantization config in a human-readable format. Prior to this patch, this was done with the help of a dict called `SUPPORTED_QCONFIGS`, which was defined in quantizer_reporter.py and populated by the user. This patch reworks this concept by instead adding a label attribute to `QuantizationConfig` that the reporter can use to print the config in a human-readable format. 5. **Arm backend: Rework reporting of qspecs** The quantization reporter prints quantization specs in human-readable format. Prior to this patch, this was implemented such that quantizer_reporter.py defined a dict `SUPPORTED_QSPECS` which was populated by the user. This dict would map qspec objects to string representations. This patch removes this dict and instead modifies the helper function `_qspec_repr` to return a compact string representation based on the attributes of the qspec. Signed-off-by: Martin Lindström <Martin.Lindstroem@arm.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent f9506f8 commit 48ec3fc

9 files changed

Lines changed: 214 additions & 199 deletions

File tree

backends/arm/quantizer/arm_quantizer.py

Lines changed: 44 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,7 @@
3939
)
4040
from executorch.backends.cortex_m.quantizer.pattern_matcher import PatternMatcher
4141

42-
from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
43-
QuantizerReporter,
44-
SUPPORTED_QCONFIGS,
45-
SUPPORTED_QSPECS,
46-
)
42+
from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter
4743

4844
from torch._ops import OpOverload
4945

@@ -219,20 +215,28 @@ def get_symmetric_quantization_config(
219215
bias_quantization_spec = _get_int32_bias_qspec
220216

221217
if is_dynamic:
222-
quantization_config = TOSAQuantizationConfig(
223-
act_quantization_spec,
224-
None,
225-
weight_quantization_spec,
226-
bias_quantization_spec,
227-
)
218+
output_activation = None
228219
else:
229-
quantization_config = TOSAQuantizationConfig(
230-
act_quantization_spec,
231-
act_quantization_spec,
232-
weight_quantization_spec,
233-
bias_quantization_spec,
234-
)
235-
return quantization_config
220+
output_activation = act_quantization_spec
221+
222+
module_name = __name__.rsplit(".", maxsplit=1)[-1]
223+
label = (
224+
f"{module_name}.get_symmetric_quantization_config("
225+
f"per_channel={int(is_per_channel)}, "
226+
f"qat={int(is_qat)}, "
227+
f"dynamic={int(is_dynamic)}, "
228+
f"act_range=[{act_qmin}, {act_qmax}], "
229+
f"weight_range=[{weight_qmin}, {weight_qmax}]"
230+
")"
231+
)
232+
233+
return TOSAQuantizationConfig(
234+
act_quantization_spec,
235+
output_activation,
236+
weight_quantization_spec,
237+
bias_quantization_spec,
238+
label,
239+
)
236240

237241

238242
@functools.lru_cache
@@ -357,59 +361,32 @@ def get_symmetric_a16w8_quantization_config(
357361
is_qat=is_qat,
358362
is_dynamic=is_dynamic,
359363
)
360-
# Replace activation quantization spec with 16-bit version
364+
361365
if is_dynamic:
362-
quantization_config = TOSAQuantizationConfig(
363-
act_quantization_spec, # 16-bit input activations
364-
None,
365-
base_config.weight, # 8-bit weights from base config
366-
base_config.bias, # bias from base config
367-
)
366+
output_activation = None
368367
else:
369-
quantization_config = TOSAQuantizationConfig(
370-
act_quantization_spec, # 16-bit input activations
371-
act_quantization_spec, # 16-bit output activations
372-
base_config.weight, # 8-bit weights from base config
373-
base_config.bias, # bias from base config
374-
)
375-
return quantization_config
376-
368+
output_activation = act_quantization_spec
369+
370+
module_name = __name__.rsplit(".", maxsplit=1)[-1]
371+
label = (
372+
f"{module_name}.get_symmetric_a16w8_quantization_config("
373+
f"per_channel={int(is_per_channel)}, "
374+
f"qat={int(is_qat)}, "
375+
f"dynamic={int(is_dynamic)}, "
376+
f"act_range=[{act_quantization_spec.quant_min}, {act_quantization_spec.quant_max}], "
377+
f"weight_range=[{weight_qmin}, {weight_qmax}]"
378+
")"
379+
)
377380

378-
# Register supported quantization configs and qspecs in the reporter for human-readable reporting
379-
# MLETORCH-1854: Temporary solution, refactor to automatically register these instead
380-
_symmetric_a8w4_config_per_channel = get_symmetric_a8w4_quantization_config()
381-
_symmetric_a8w8_config_per_channel = get_symmetric_quantization_config()
382-
_symmetric_a16w8_config_per_channel = get_symmetric_a16w8_quantization_config()
383-
_symmetric_a8w4_config_per_tensor = get_symmetric_a8w4_quantization_config(
384-
is_per_channel=False
385-
)
386-
_symmetric_a8w8_config_per_tensor = get_symmetric_quantization_config(
387-
is_per_channel=False
388-
)
389-
_symmetric_a16w8_config_per_tensor = get_symmetric_a16w8_quantization_config(
390-
is_per_channel=False
391-
)
392-
SUPPORTED_QCONFIGS.update(
393-
{
394-
_symmetric_a8w8_config_per_channel: f"{__name__}.get_symmetric_quantization_config(is_per_channel=True)",
395-
_symmetric_a16w8_config_per_channel: f"{__name__}.get_symmetric_a16w8_quantization_config(is_per_channel=True)",
396-
_symmetric_a8w4_config_per_channel: f"{__name__}.get_symmetric_a8w4_quantization_config(is_per_channel=True)",
397-
_symmetric_a8w8_config_per_tensor: f"{__name__}.get_symmetric_quantization_config(is_per_channel=False)",
398-
_symmetric_a16w8_config_per_tensor: f"{__name__}.get_symmetric_a16w8_quantization_config(is_per_channel=False)",
399-
_symmetric_a8w4_config_per_tensor: f"{__name__}.get_symmetric_a8w4_quantization_config(is_per_channel=False)",
400-
}
401-
)
381+
# Replace activation quantization spec with 16-bit version
382+
return TOSAQuantizationConfig(
383+
act_quantization_spec, # 16-bit input activations
384+
output_activation,
385+
base_config.weight, # 8-bit weights from base config
386+
base_config.bias, # bias from base config
387+
label,
388+
)
402389

403-
SUPPORTED_QSPECS.update(
404-
{
405-
_symmetric_a8w4_config_per_channel.get_weight_qspec(): "INT4_PER_CHANNEL_QSPEC",
406-
_symmetric_a8w8_config_per_channel.get_weight_qspec(): "INT8_PER_CHANNEL_QSPEC",
407-
_symmetric_a8w8_config_per_tensor.get_weight_qspec(): "INT8_PER_TENSOR_QSPEC",
408-
_symmetric_a8w4_config_per_tensor.get_weight_qspec(): "INT4_PER_TENSOR_QSPEC",
409-
_symmetric_a8w8_config_per_tensor.get_input_act_qspec(): "INT8_PER_TENSOR_QSPEC",
410-
_symmetric_a16w8_config_per_tensor.get_input_act_qspec(): "INT16_PER_TENSOR_QSPEC",
411-
}
412-
)
413390

414391
NodeFilterType = Callable[[Node], bool]
415392
"""Type for a Node Filter used by annotators.

backends/arm/quantizer/arm_quantizer_utils.py

Lines changed: 19 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
from executorch.backends.arm.common.annotation_meta import ArmAnnotationInfo
2222
from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY
2323
from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
24+
from executorch.backends.cortex_m.quantizer_reporter import (
25+
QuantizerInfo,
26+
QuantizerReporterUser,
27+
)
2428
from torch.fx import Node
2529

2630
from torchao.quantization.pt2e.quantizer import (
@@ -160,25 +164,6 @@ def _get_int32_per_channel_bias_qspec(node):
160164
)
161165

162166

163-
class _QuantizerReporterUserMixin:
164-
def __init__(self):
165-
self.reporter = None
166-
167-
def register_reporter(self, reporter) -> None:
168-
self.reporter = reporter
169-
170-
def report_reject(self, pattern: list[Node], reason: str) -> None:
171-
if self.reporter is not None:
172-
self.reporter.report_reject(self, pattern, reason)
173-
174-
def report_accept(self, pattern: list[Node]) -> None:
175-
if self.reporter is not None:
176-
self.reporter.report_accept(self, pattern)
177-
178-
def get_quantizer_info(self):
179-
raise NotImplementedError("Quantizer must implement get_quantizer_info method.")
180-
181-
182167
class PatternCheck:
183168
"""Base class for pattern checks.
184169
@@ -248,7 +233,7 @@ def find_nodes(self, model: torch.fx.GraphModule) -> Iterator[Node]:
248233
pass
249234

250235

251-
class PatternQuantizer(Quantizer, _QuantizerReporterUserMixin):
236+
class PatternQuantizer(Quantizer, QuantizerReporterUser):
252237
"""Quantizes a graph according to an OperatorConfig.
253238
254239
Args:
@@ -265,28 +250,28 @@ def __init__(
265250
pattern_matcher: "PatternMatcher",
266251
) -> None:
267252
super().__init__()
268-
_QuantizerReporterUserMixin.__init__(self)
253+
QuantizerReporterUser.__init__(self)
269254
self.quantization_config: QuantizationConfig | None = quantization_config
270255
self.node_finder: "NodeFinder" = node_finder
271256
self.pattern_matcher: "PatternMatcher" = pattern_matcher
272257

273258
def get_quantizer_info(self):
274-
from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
275-
QuantizerInfo,
276-
SUPPORTED_QCONFIGS,
277-
)
278-
279259
name = self.__class__.__name__
280260
targeted_nodes_description = str(self.node_finder)
281-
quantization_config_path = SUPPORTED_QCONFIGS.get(
282-
self.quantization_config, "UNREGISTERED_QCONFIG"
283-
)
261+
if self.quantization_config is None:
262+
qconfig_label = "NO_QCONFIG"
263+
else:
264+
qconfig_label = (
265+
self.quantization_config.label
266+
if self.quantization_config.label is not None
267+
else self.quantization_config.__class__.__name__ # no label, fallback to class name
268+
)
284269
support_config_path = self.pattern_matcher.support_dict_name
285270

286271
return QuantizerInfo(
287272
name,
288273
targeted_nodes_description,
289-
quantization_config_path,
274+
qconfig_label,
290275
support_config_path,
291276
)
292277

@@ -397,7 +382,7 @@ def validate(self, model: torch.fx.GraphModule) -> bool: # type: ignore[overrid
397382
return True
398383

399384

400-
class SharedQspecQuantizer(Quantizer, _QuantizerReporterUserMixin):
385+
class SharedQspecQuantizer(Quantizer, QuantizerReporterUser):
401386
"""Assures that specific ops share quantization parameters on all
402387
inputs/outputs.
403388
"""
@@ -495,7 +480,7 @@ class SharedQspecQuantizer(Quantizer, _QuantizerReporterUserMixin):
495480

496481
def __init__(self, targets: Optional[list[Callable[..., object]]] = None) -> None:
497482
super().__init__()
498-
_QuantizerReporterUserMixin.__init__(self)
483+
QuantizerReporterUser.__init__(self)
499484
if targets is None:
500485
self.targets = self.SHARED_QSPEC_OPS_DEFAULT
501486
self.support_config_path = (
@@ -508,18 +493,14 @@ def __init__(self, targets: Optional[list[Callable[..., object]]] = None) -> Non
508493
)
509494

510495
def get_quantizer_info(self):
511-
from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
512-
QuantizerInfo,
513-
)
514-
515496
name = self.__class__.__name__
516497
targeted_nodes_description = ""
517-
quantization_config_path = "SHARED_QCONFIG"
498+
qconfig_label = "shared qparams for connected targeted nodes"
518499
support_config_path = self.support_config_path
519500
return QuantizerInfo(
520501
name,
521502
targeted_nodes_description,
522-
quantization_config_path,
503+
qconfig_label,
523504
support_config_path,
524505
)
525506

backends/arm/quantizer/quantization_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class QuantizationConfig:
4646
output_activation: Optional[QuantizationSpecBase]
4747
weight: Optional[QuantizationSpecBase]
4848
bias: Optional[QuantizationSpecBase] | Callable[[Any], Any]
49+
label: Optional[str] = None # Optional label for debugging/visualization purposes
4950

5051
def get_input_act_qspec(
5152
self, node: Optional[Node] = None, input_node: Optional[Node] = None

backends/cortex_m/TARGETS

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
8+
9+
oncall("executorch")
10+
11+
python_library(
12+
name = "quantizer_reporter",
13+
srcs = [
14+
"quantizer_reporter.py",
15+
],
16+
deps = [
17+
"//caffe2:torch",
18+
"//pytorch/ao:torchao",
19+
"fbsource//third-party/pypi/tabulate:tabulate",
20+
],
21+
)

backends/cortex_m/quantizer/TARGETS

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ python_library(
1717
"pattern_matcher.py",
1818
"quantization_configs.py",
1919
"quantizer.py",
20-
"quantizer_reporter.py",
2120
"quantizer_support.py",
2221
],
2322
deps = [
@@ -27,6 +26,7 @@ python_library(
2726
"//executorch/backends/arm/quantizer:arm_quantizer_utils",
2827
"//executorch/backends/arm/quantizer:quantization_annotator",
2928
"//executorch/backends/arm/quantizer:quantization_config",
29+
"//executorch/backends/cortex_m:quantizer_reporter",
3030
"//pytorch/ao:torchao",
3131
"fbsource//third-party/pypi/tabulate:tabulate",
3232
],
@@ -42,19 +42,7 @@ python_library(
4242
"//caffe2:torch",
4343
"//executorch/backends/arm/quantizer:arm_quantizer_utils",
4444
"//executorch/backends/arm/quantizer:quantization_config",
45+
"//executorch/backends/cortex_m:quantizer_reporter",
4546
"//pytorch/ao:torchao",
46-
":quantizer_reporter",
47-
],
48-
)
49-
50-
python_library(
51-
name = "quantizer_reporter",
52-
srcs = [
53-
"quantizer_reporter.py",
54-
],
55-
deps = [
56-
"//caffe2:torch",
57-
"//pytorch/ao:torchao",
58-
"fbsource//third-party/pypi/tabulate:tabulate",
5947
],
6048
)

backends/cortex_m/quantizer/quantization_configs.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010
_get_int32_per_channel_bias_qspec,
1111
)
1212
from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
13-
from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
14-
SUPPORTED_QCONFIGS,
15-
SUPPORTED_QSPECS,
16-
)
1713
from torch.fx import Node
1814
from torchao.quantization.pt2e import (
1915
HistogramObserver,
@@ -156,6 +152,7 @@ def get_bias_qspec(
156152
INT8_ACTIVATION_PER_TENSOR_QSPEC,
157153
INT8_WEIGHT_PER_TENSOR_QSPEC,
158154
_get_int32_bias_qspec,
155+
f"{__name__}.INT8_PER_TENSOR_CONFIG",
159156
)
160157

161158

@@ -164,25 +161,5 @@ def get_bias_qspec(
164161
INT8_ACTIVATION_PER_TENSOR_QSPEC,
165162
INT8_WEIGHT_PER_CHANNEL_QSPEC,
166163
_get_int32_per_channel_bias_qspec,
167-
)
168-
169-
170-
# Register supported quantization configs and qspecs in the reporter for human-readable reporting
171-
# MLETORCH-1854: Temporary solution, refactor to automatically register these instead
172-
SUPPORTED_QCONFIGS.update(
173-
{
174-
INT8_PER_CHANNEL_CONFIG: f"{__name__}.INT8_PER_CHANNEL_QCONFIG",
175-
INT8_PER_TENSOR_CONFIG: f"{__name__}.INT8_PER_TENSOR_QCONFIG",
176-
}
177-
)
178-
179-
SUPPORTED_QSPECS.update(
180-
{
181-
INT8_ACTIVATION_PER_TENSOR_QSPEC: "INT8_ACTIVATION_PER_TENSOR_QSPEC",
182-
INT8_ACTIVATION_PER_CHANNEL_QSPEC: "INT8_ACTIVATION_PER_CHANNEL_QSPEC",
183-
INT8_WEIGHT_PER_TENSOR_QSPEC: "INT8_WEIGHT_PER_TENSOR_QSPEC",
184-
INT8_WEIGHT_PER_CHANNEL_QSPEC: "INT8_WEIGHT_PER_CHANNEL_QSPEC",
185-
INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC: "INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC",
186-
SOFTMAX_OUTPUT_FIXED_QSPEC: "SOFTMAX_OUTPUT_FIXED_QSPEC",
187-
}
164+
f"{__name__}.INT8_PER_CHANNEL_CONFIG",
188165
)

backends/cortex_m/quantizer/quantizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
INT8_PER_CHANNEL_CONFIG,
2323
INT8_PER_TENSOR_CONFIG,
2424
)
25-
from executorch.backends.cortex_m.quantizer.quantizer_reporter import QuantizerReporter
2625
from executorch.backends.cortex_m.quantizer.quantizer_support import (
2726
__name__ as cortex_m_quantizer_support_module,
2827
CONV_OP_PATTERNS,
2928
CONV_TRANSPOSE_OP_PATTERNS,
3029
CORTEX_M_QUANTIZER_SUPPORT_DICT,
3130
)
31+
from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter
3232
from torch._ops import OpOverload
3333
from torch.fx import GraphModule
3434
from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer

0 commit comments

Comments
 (0)