Arm backend: Rework reporting of qspecs and qconfigs (#19016)

martinlsm · Copilot · web-flow · commit 48ec3fc415f5 · 2026-04-22T13:42:35.000+02:00
This PR contains four commits:

1. **Move quantizer_reporter out from cortex_m.quantizer**
When importing `backends.cortex_m.quantizer.quantizer_reporter` from
`backends.arm.quantizer.arm_quantizer_utils`, a cyclic dependency chain
is
is formed. The problem is that quantizer_reporter triggers
`backends/cortex_m/quantizer/__init__.py` when imported, which in turn
has
imports leading back to the Arm backend. To fix this problem, move
quantizer_reporter to backends/cortex_m so it can be imported without
forming any cycle.

3. **Arm backend: Remove _QuantizerReporterUserMixin**
_QuantizerReporterUserMixin was a duplicated class with the name
QuantizerReporterUser. Remove the former to instead use the latter.

4. **Arm backend: Add label attribute to QuantizationConfig**
The quantizer reporter logs the quantization config in a human-readable
format. Prior to this patch, this was done with the help of a dict
called `SUPPORTED_QCONFIGS`, which was defined in quantizer_reporter.py
and populated by the user. This patch reworks this concept by instead
adding a label attribute to `QuantizationConfig` that the reporter can
use to print the config in a human-readable format.

5. **Arm backend: Rework reporting of qspecs**
The quantization reporter prints quantization specs in human-readable
format. Prior to this patch, this was implemented such that
quantizer_reporter.py defined a dict `SUPPORTED_QSPECS` which was
populated by the user. This dict would map qspec objects to string
representations. This patch removes this dict and instead modifies
the helper function `_qspec_repr` to return a compact string
representation based on the attributes of the qspec.

Signed-off-by: Martin Lindström &lt;Martin.Lindstroem@arm.com&gt;
Co-authored-by: Copilot Autofix powered by AI &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -39,11 +39,7 @@
 )
 from executorch.backends.cortex_m.quantizer.pattern_matcher import PatternMatcher
 
-from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
-    QuantizerReporter,
-    SUPPORTED_QCONFIGS,
-    SUPPORTED_QSPECS,
-)
+from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter
 
 from torch._ops import OpOverload
 
@@ -219,20 +215,28 @@ def get_symmetric_quantization_config(
         bias_quantization_spec = _get_int32_bias_qspec
 
     if is_dynamic:
-        quantization_config = TOSAQuantizationConfig(
-            act_quantization_spec,
-            None,
-            weight_quantization_spec,
-            bias_quantization_spec,
-        )
+        output_activation = None
     else:
-        quantization_config = TOSAQuantizationConfig(
-            act_quantization_spec,
-            act_quantization_spec,
-            weight_quantization_spec,
-            bias_quantization_spec,
-        )
-    return quantization_config
+        output_activation = act_quantization_spec
+
+    module_name = __name__.rsplit(".", maxsplit=1)[-1]
+    label = (
+        f"{module_name}.get_symmetric_quantization_config("
+        f"per_channel={int(is_per_channel)}, "
+        f"qat={int(is_qat)}, "
+        f"dynamic={int(is_dynamic)}, "
+        f"act_range=[{act_qmin}, {act_qmax}], "
+        f"weight_range=[{weight_qmin}, {weight_qmax}]"
+        ")"
+    )
+
+    return TOSAQuantizationConfig(
+        act_quantization_spec,
+        output_activation,
+        weight_quantization_spec,
+        bias_quantization_spec,
+        label,
+    )
 
 
 @functools.lru_cache
@@ -357,59 +361,32 @@ def get_symmetric_a16w8_quantization_config(
         is_qat=is_qat,
         is_dynamic=is_dynamic,
     )
-    # Replace activation quantization spec with 16-bit version
+
     if is_dynamic:
-        quantization_config = TOSAQuantizationConfig(
-            act_quantization_spec,  # 16-bit input activations
-            None,
-            base_config.weight,  # 8-bit weights from base config
-            base_config.bias,  # bias from base config
-        )
+        output_activation = None
     else:
-        quantization_config = TOSAQuantizationConfig(
-            act_quantization_spec,  # 16-bit input activations
-            act_quantization_spec,  # 16-bit output activations
-            base_config.weight,  # 8-bit weights from base config
-            base_config.bias,  # bias from base config
-        )
-    return quantization_config
-
+        output_activation = act_quantization_spec
+
+    module_name = __name__.rsplit(".", maxsplit=1)[-1]
+    label = (
+        f"{module_name}.get_symmetric_a16w8_quantization_config("
+        f"per_channel={int(is_per_channel)}, "
+        f"qat={int(is_qat)}, "
+        f"dynamic={int(is_dynamic)}, "
+        f"act_range=[{act_quantization_spec.quant_min}, {act_quantization_spec.quant_max}], "
+        f"weight_range=[{weight_qmin}, {weight_qmax}]"
+        ")"
+    )
 
-# Register supported quantization configs and qspecs in the reporter for human-readable reporting
-# MLETORCH-1854: Temporary solution, refactor to automatically register these instead
-_symmetric_a8w4_config_per_channel = get_symmetric_a8w4_quantization_config()
-_symmetric_a8w8_config_per_channel = get_symmetric_quantization_config()
-_symmetric_a16w8_config_per_channel = get_symmetric_a16w8_quantization_config()
-_symmetric_a8w4_config_per_tensor = get_symmetric_a8w4_quantization_config(
-    is_per_channel=False
-)
-_symmetric_a8w8_config_per_tensor = get_symmetric_quantization_config(
-    is_per_channel=False
-)
-_symmetric_a16w8_config_per_tensor = get_symmetric_a16w8_quantization_config(
-    is_per_channel=False
-)
-SUPPORTED_QCONFIGS.update(
-    {
-        _symmetric_a8w8_config_per_channel: f"{__name__}.get_symmetric_quantization_config(is_per_channel=True)",
-        _symmetric_a16w8_config_per_channel: f"{__name__}.get_symmetric_a16w8_quantization_config(is_per_channel=True)",
-        _symmetric_a8w4_config_per_channel: f"{__name__}.get_symmetric_a8w4_quantization_config(is_per_channel=True)",
-        _symmetric_a8w8_config_per_tensor: f"{__name__}.get_symmetric_quantization_config(is_per_channel=False)",
-        _symmetric_a16w8_config_per_tensor: f"{__name__}.get_symmetric_a16w8_quantization_config(is_per_channel=False)",
-        _symmetric_a8w4_config_per_tensor: f"{__name__}.get_symmetric_a8w4_quantization_config(is_per_channel=False)",
-    }
-)
+    # Replace activation quantization spec with 16-bit version
+    return TOSAQuantizationConfig(
+        act_quantization_spec,  # 16-bit input activations
+        output_activation,
+        base_config.weight,  # 8-bit weights from base config
+        base_config.bias,  # bias from base config
+        label,
+    )
 
-SUPPORTED_QSPECS.update(
-    {
-        _symmetric_a8w4_config_per_channel.get_weight_qspec(): "INT4_PER_CHANNEL_QSPEC",
-        _symmetric_a8w8_config_per_channel.get_weight_qspec(): "INT8_PER_CHANNEL_QSPEC",
-        _symmetric_a8w8_config_per_tensor.get_weight_qspec(): "INT8_PER_TENSOR_QSPEC",
-        _symmetric_a8w4_config_per_tensor.get_weight_qspec(): "INT4_PER_TENSOR_QSPEC",
-        _symmetric_a8w8_config_per_tensor.get_input_act_qspec(): "INT8_PER_TENSOR_QSPEC",
-        _symmetric_a16w8_config_per_tensor.get_input_act_qspec(): "INT16_PER_TENSOR_QSPEC",
-    }
-)
 
 NodeFilterType = Callable[[Node], bool]
 """Type for a Node Filter used by annotators.
diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py
@@ -21,6 +21,10 @@
 from executorch.backends.arm.common.annotation_meta import ArmAnnotationInfo
 from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
+from executorch.backends.cortex_m.quantizer_reporter import (
+    QuantizerInfo,
+    QuantizerReporterUser,
+)
 from torch.fx import Node
 
 from torchao.quantization.pt2e.quantizer import (
@@ -160,25 +164,6 @@ def _get_int32_per_channel_bias_qspec(node):
     )
 
 
-class _QuantizerReporterUserMixin:
-    def __init__(self):
-        self.reporter = None
-
-    def register_reporter(self, reporter) -> None:
-        self.reporter = reporter
-
-    def report_reject(self, pattern: list[Node], reason: str) -> None:
-        if self.reporter is not None:
-            self.reporter.report_reject(self, pattern, reason)
-
-    def report_accept(self, pattern: list[Node]) -> None:
-        if self.reporter is not None:
-            self.reporter.report_accept(self, pattern)
-
-    def get_quantizer_info(self):
-        raise NotImplementedError("Quantizer must implement get_quantizer_info method.")
-
-
 class PatternCheck:
     """Base class for pattern checks.
 
@@ -248,7 +233,7 @@ def find_nodes(self, model: torch.fx.GraphModule) -> Iterator[Node]:
         pass
 
 
-class PatternQuantizer(Quantizer, _QuantizerReporterUserMixin):
+class PatternQuantizer(Quantizer, QuantizerReporterUser):
     """Quantizes a graph according to an OperatorConfig.
 
     Args:
@@ -265,28 +250,28 @@ def __init__(
         pattern_matcher: "PatternMatcher",
     ) -> None:
         super().__init__()
-        _QuantizerReporterUserMixin.__init__(self)
+        QuantizerReporterUser.__init__(self)
         self.quantization_config: QuantizationConfig | None = quantization_config
         self.node_finder: "NodeFinder" = node_finder
         self.pattern_matcher: "PatternMatcher" = pattern_matcher
 
     def get_quantizer_info(self):
-        from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
-            QuantizerInfo,
-            SUPPORTED_QCONFIGS,
-        )
-
         name = self.__class__.__name__
         targeted_nodes_description = str(self.node_finder)
-        quantization_config_path = SUPPORTED_QCONFIGS.get(
-            self.quantization_config, "UNREGISTERED_QCONFIG"
-        )
+        if self.quantization_config is None:
+            qconfig_label = "NO_QCONFIG"
+        else:
+            qconfig_label = (
+                self.quantization_config.label
+                if self.quantization_config.label is not None
+                else self.quantization_config.__class__.__name__  # no label, fallback to class name
+            )
         support_config_path = self.pattern_matcher.support_dict_name
 
         return QuantizerInfo(
             name,
             targeted_nodes_description,
-            quantization_config_path,
+            qconfig_label,
             support_config_path,
         )
 
@@ -397,7 +382,7 @@ def validate(self, model: torch.fx.GraphModule) -> bool:  # type: ignore[overrid
         return True
 
 
-class SharedQspecQuantizer(Quantizer, _QuantizerReporterUserMixin):
+class SharedQspecQuantizer(Quantizer, QuantizerReporterUser):
     """Assures that specific ops share quantization parameters on all
     inputs/outputs.
     """
@@ -495,7 +480,7 @@ class SharedQspecQuantizer(Quantizer, _QuantizerReporterUserMixin):
 
     def __init__(self, targets: Optional[list[Callable[..., object]]] = None) -> None:
         super().__init__()
-        _QuantizerReporterUserMixin.__init__(self)
+        QuantizerReporterUser.__init__(self)
         if targets is None:
             self.targets = self.SHARED_QSPEC_OPS_DEFAULT
             self.support_config_path = (
@@ -508,18 +493,14 @@ def __init__(self, targets: Optional[list[Callable[..., object]]] = None) -> Non
             )
 
     def get_quantizer_info(self):
-        from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
-            QuantizerInfo,
-        )
-
         name = self.__class__.__name__
         targeted_nodes_description = ""
-        quantization_config_path = "SHARED_QCONFIG"
+        qconfig_label = "shared qparams for connected targeted nodes"
         support_config_path = self.support_config_path
         return QuantizerInfo(
             name,
             targeted_nodes_description,
-            quantization_config_path,
+            qconfig_label,
             support_config_path,
         )
 
diff --git a/backends/arm/quantizer/quantization_config.py b/backends/arm/quantizer/quantization_config.py
@@ -46,6 +46,7 @@ class QuantizationConfig:
     output_activation: Optional[QuantizationSpecBase]
     weight: Optional[QuantizationSpecBase]
     bias: Optional[QuantizationSpecBase] | Callable[[Any], Any]
+    label: Optional[str] = None  # Optional label for debugging/visualization purposes
 
     def get_input_act_qspec(
         self, node: Optional[Node] = None, input_node: Optional[Node] = None
diff --git a/backends/cortex_m/TARGETS b/backends/cortex_m/TARGETS
@@ -0,0 +1,21 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+
+oncall("executorch")
+
+python_library(
+    name = "quantizer_reporter",
+    srcs = [
+        "quantizer_reporter.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//pytorch/ao:torchao",
+        "fbsource//third-party/pypi/tabulate:tabulate",
+    ],
+)
diff --git a/backends/cortex_m/quantizer/TARGETS b/backends/cortex_m/quantizer/TARGETS
@@ -17,7 +17,6 @@ python_library(
         "pattern_matcher.py",
         "quantization_configs.py",
         "quantizer.py",
-        "quantizer_reporter.py",
         "quantizer_support.py",
     ],
     deps = [
@@ -27,6 +26,7 @@ python_library(
         "//executorch/backends/arm/quantizer:arm_quantizer_utils",
         "//executorch/backends/arm/quantizer:quantization_annotator",
         "//executorch/backends/arm/quantizer:quantization_config",
+        "//executorch/backends/cortex_m:quantizer_reporter",
         "//pytorch/ao:torchao",
         "fbsource//third-party/pypi/tabulate:tabulate",
     ],
@@ -42,19 +42,7 @@ python_library(
         "//caffe2:torch",
         "//executorch/backends/arm/quantizer:arm_quantizer_utils",
         "//executorch/backends/arm/quantizer:quantization_config",
+        "//executorch/backends/cortex_m:quantizer_reporter",
         "//pytorch/ao:torchao",
-        ":quantizer_reporter",
-    ],
-)
-
-python_library(
-    name = "quantizer_reporter",
-    srcs = [
-        "quantizer_reporter.py",
-    ],
-    deps = [
-        "//caffe2:torch",
-        "//pytorch/ao:torchao",
-        "fbsource//third-party/pypi/tabulate:tabulate",
     ],
 )
diff --git a/backends/cortex_m/quantizer/quantization_configs.py b/backends/cortex_m/quantizer/quantization_configs.py
@@ -10,10 +10,6 @@
     _get_int32_per_channel_bias_qspec,
 )
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
-from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
-    SUPPORTED_QCONFIGS,
-    SUPPORTED_QSPECS,
-)
 from torch.fx import Node
 from torchao.quantization.pt2e import (
     HistogramObserver,
@@ -156,6 +152,7 @@ def get_bias_qspec(
     INT8_ACTIVATION_PER_TENSOR_QSPEC,
     INT8_WEIGHT_PER_TENSOR_QSPEC,
     _get_int32_bias_qspec,
+    f"{__name__}.INT8_PER_TENSOR_CONFIG",
 )
 
 
@@ -164,25 +161,5 @@ def get_bias_qspec(
     INT8_ACTIVATION_PER_TENSOR_QSPEC,
     INT8_WEIGHT_PER_CHANNEL_QSPEC,
     _get_int32_per_channel_bias_qspec,
-)
-
-
-# Register supported quantization configs and qspecs in the reporter for human-readable reporting
-# MLETORCH-1854: Temporary solution, refactor to automatically register these instead
-SUPPORTED_QCONFIGS.update(
-    {
-        INT8_PER_CHANNEL_CONFIG: f"{__name__}.INT8_PER_CHANNEL_QCONFIG",
-        INT8_PER_TENSOR_CONFIG: f"{__name__}.INT8_PER_TENSOR_QCONFIG",
-    }
-)
-
-SUPPORTED_QSPECS.update(
-    {
-        INT8_ACTIVATION_PER_TENSOR_QSPEC: "INT8_ACTIVATION_PER_TENSOR_QSPEC",
-        INT8_ACTIVATION_PER_CHANNEL_QSPEC: "INT8_ACTIVATION_PER_CHANNEL_QSPEC",
-        INT8_WEIGHT_PER_TENSOR_QSPEC: "INT8_WEIGHT_PER_TENSOR_QSPEC",
-        INT8_WEIGHT_PER_CHANNEL_QSPEC: "INT8_WEIGHT_PER_CHANNEL_QSPEC",
-        INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC: "INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC",
-        SOFTMAX_OUTPUT_FIXED_QSPEC: "SOFTMAX_OUTPUT_FIXED_QSPEC",
-    }
+    f"{__name__}.INT8_PER_CHANNEL_CONFIG",
 )
diff --git a/backends/cortex_m/quantizer/quantizer.py b/backends/cortex_m/quantizer/quantizer.py
@@ -22,13 +22,13 @@
     INT8_PER_CHANNEL_CONFIG,
     INT8_PER_TENSOR_CONFIG,
 )
-from executorch.backends.cortex_m.quantizer.quantizer_reporter import QuantizerReporter
 from executorch.backends.cortex_m.quantizer.quantizer_support import (
     __name__ as cortex_m_quantizer_support_module,
     CONV_OP_PATTERNS,
     CONV_TRANSPOSE_OP_PATTERNS,
     CORTEX_M_QUANTIZER_SUPPORT_DICT,
 )
+from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter
 from torch._ops import OpOverload
 from torch.fx import GraphModule
 from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer
diff --git a/backends/cortex_m/quantizer_reporter.py b/backends/cortex_m/quantizer_reporter.py
diff --git a/backends/cortex_m/test/misc/test_quantizer_reporter.py b/backends/cortex_m/test/misc/test_quantizer_reporter.py