Arm backend: Rework reporting of qspecs

Martin Lindström · Martin Lindström · commit 3476cd83e8a8 · 2026-04-21T10:30:29.000+02:00
The quantization reporter prints quantization specs in human-readable
format. Prior to this patch, this was implemented such that
quantizer_reporter.py defined a dict `SUPPORTED_QSPECS` which was
populated by the user. This dict would map qspec objects to string
representations. This patch removes this dict and instead modifies
the helper function `_qspec_repr` to return a compact string
representation based on the attributes of the qspec.

Signed-off-by: Martin Lindström &lt;Martin.Lindstroem@arm.com&gt;
Change-Id: I9ccd9127b8c332e7c30662be6986ccad4a38881f
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -39,10 +39,7 @@
 )
 from executorch.backends.cortex_m.quantizer.pattern_matcher import PatternMatcher
 
-from executorch.backends.cortex_m.quantizer_reporter import (
-    QuantizerReporter,
-    SUPPORTED_QSPECS,
-)
+from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter
 
 from torch._ops import OpOverload
 
@@ -391,32 +388,6 @@ def get_symmetric_a16w8_quantization_config(
     )
 
 
-# Register supported quantization configs and qspecs in the reporter for human-readable reporting
-# MLETORCH-1854: Temporary solution, refactor to automatically register these instead
-_symmetric_a8w4_config_per_channel = get_symmetric_a8w4_quantization_config()
-_symmetric_a8w8_config_per_channel = get_symmetric_quantization_config()
-_symmetric_a16w8_config_per_channel = get_symmetric_a16w8_quantization_config()
-_symmetric_a8w4_config_per_tensor = get_symmetric_a8w4_quantization_config(
-    is_per_channel=False
-)
-_symmetric_a8w8_config_per_tensor = get_symmetric_quantization_config(
-    is_per_channel=False
-)
-_symmetric_a16w8_config_per_tensor = get_symmetric_a16w8_quantization_config(
-    is_per_channel=False
-)
-
-SUPPORTED_QSPECS.update(
-    {
-        _symmetric_a8w4_config_per_channel.get_weight_qspec(): "INT4_PER_CHANNEL_QSPEC",
-        _symmetric_a8w8_config_per_channel.get_weight_qspec(): "INT8_PER_CHANNEL_QSPEC",
-        _symmetric_a8w8_config_per_tensor.get_weight_qspec(): "INT8_PER_TENSOR_QSPEC",
-        _symmetric_a8w4_config_per_tensor.get_weight_qspec(): "INT4_PER_TENSOR_QSPEC",
-        _symmetric_a8w8_config_per_tensor.get_input_act_qspec(): "INT8_PER_TENSOR_QSPEC",
-        _symmetric_a16w8_config_per_tensor.get_input_act_qspec(): "INT16_PER_TENSOR_QSPEC",
-    }
-)
-
 NodeFilterType = Callable[[Node], bool]
 """Type for a Node Filter used by annotators.
 
diff --git a/backends/cortex_m/quantizer/quantization_configs.py b/backends/cortex_m/quantizer/quantization_configs.py
@@ -10,7 +10,6 @@
     _get_int32_per_channel_bias_qspec,
 )
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
-from executorch.backends.cortex_m.quantizer_reporter import SUPPORTED_QSPECS
 from torch.fx import Node
 from torchao.quantization.pt2e import (
     HistogramObserver,
@@ -164,15 +163,3 @@ def get_bias_qspec(
     _get_int32_per_channel_bias_qspec,
     f"{__name__}.INT8_PER_CHANNEL_CONFIG",
 )
-
-
-SUPPORTED_QSPECS.update(
-    {
-        INT8_ACTIVATION_PER_TENSOR_QSPEC: "INT8_ACTIVATION_PER_TENSOR_QSPEC",
-        INT8_ACTIVATION_PER_CHANNEL_QSPEC: "INT8_ACTIVATION_PER_CHANNEL_QSPEC",
-        INT8_WEIGHT_PER_TENSOR_QSPEC: "INT8_WEIGHT_PER_TENSOR_QSPEC",
-        INT8_WEIGHT_PER_CHANNEL_QSPEC: "INT8_WEIGHT_PER_CHANNEL_QSPEC",
-        INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC: "INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC",
-        SOFTMAX_OUTPUT_FIXED_QSPEC: "SOFTMAX_OUTPUT_FIXED_QSPEC",
-    }
-)
diff --git a/backends/cortex_m/quantizer_reporter.py b/backends/cortex_m/quantizer_reporter.py
@@ -4,7 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 """Contains classes for reporting quantization decisions made by Quantizers.
 
-Basic useage:
+Basic usage:
 1. Implement the QuantizerReporterUser API for all quantizers intending to use the reporter.
 2. Instantiate the QuantizerReporter with a list of quantizers to be reported.
 3. After annotation, log the report using QuantizerReporter.log_quantizer_report(model).
@@ -17,7 +17,7 @@
 
 import logging
 from importlib import import_module
-from typing import Callable, cast, Dict, List, NamedTuple, Optional
+from typing import Any, Callable, cast, Dict, List, NamedTuple, Optional
 
 from torch.fx import GraphModule, Node
 from torchao.quantization.pt2e.quantizer import (
@@ -32,43 +32,29 @@
 logger = logging.getLogger(__name__)
 tabulate = cast(Callable[..., str], import_module("tabulate").tabulate)
 
-# Look-up dicts used to get human readable names for supported quantization specs
-SUPPORTED_QSPECS: dict[QuantizationSpecBase | None, str] = {}
 
+def qspec_repr(qspec: Optional[QuantizationSpecBase]) -> str:
+    """Get a human-readable representation of a QuantizationSpec."""
 
-def _qspec_repr(qspec):
-    """Get a human readable representation of QuantizationSpecs.
-
-    Note that the observer_or_fake_quant_ctr field is created dynamically with
-    the qspec so two qspecs created at different times will not evaluate as
-    equal. Therefore a custom comparison is required.
-
-    #TODO: Clean up qconfig/ qspec string representation logic in cortex_m/arm
-    backend.
-
-    """
     if isinstance(qspec, SharedQuantizationSpec):
-        return "SHARED_QSPEC"
+        return f"SharedQuantizationSpec(edge_or_node={qspec.edge_or_node})"
     elif isinstance(qspec, DerivedQuantizationSpec):
-        return "DERIVED_QSPEC"
-    elif qspec is None:
-        return "NO_QSPEC"
+        return f"DerivedQuantizationSpec(derived_from={qspec.derived_from}, dtype={qspec.dtype})"
     elif isinstance(qspec, QuantizationSpec):
-        for key, val in SUPPORTED_QSPECS.items():
-            if type(qspec) is not type(key):
-                continue
-            if qspec.dtype != key.dtype:
-                continue
-            if qspec.quant_min != key.quant_min:
-                continue
-            if qspec.quant_max != key.quant_max:
-                continue
-            if qspec.qscheme != key.qscheme:
-                continue
-            if qspec.is_dynamic != key.is_dynamic:
-                continue
-            return val
-    return "UNREGISTERED_QSPEC"
+
+        def _fmt(obj: Any) -> str:
+            return str(obj).removeprefix("torch.").upper()
+
+        q_range_fmt = (
+            f", range=({qspec.quant_min},{qspec.quant_max})"
+            if (qspec.quant_min is not None or qspec.quant_max is not None)
+            else ""
+        )
+        return f"QuantizationSpec(dtype={_fmt(qspec.dtype)}{q_range_fmt})"
+    elif qspec is None:
+        return "None"
+    else:
+        return qspec.__class__.__name__
 
 
 class QuantizerInfo(NamedTuple):
@@ -154,15 +140,15 @@ def report_accept(self, pattern: List[Node]) -> None:
                     f"Node {node.name} was reported as annotated but annotation metadata is missing."
                 )
             qspec_input_map_lines = [
-                f"{node.name}: {_qspec_repr(qspec)}"
+                f"{node.name}: {qspec_repr(qspec)}"
                 for node, qspec in annotation.input_qspec_map.items()
             ]
 
             node_reports.append(
                 NodeQSpecReport(
                     node.name,
                     qspec_input_map_lines,
-                    _qspec_repr(annotation.output_qspec),
+                    qspec_repr(annotation.output_qspec),
                 )
             )
 
diff --git a/backends/cortex_m/test/misc/test_quantizer_reporter.py b/backends/cortex_m/test/misc/test_quantizer_reporter.py
@@ -6,19 +6,36 @@
 import logging
 
 import torch
-from executorch.backends.cortex_m.quantizer.quantization_configs import (
-    INT8_ACTIVATION_PER_CHANNEL_QSPEC,
-    INT8_WEIGHT_PER_TENSOR_QSPEC,
-)
 from executorch.backends.cortex_m.quantizer.quantizer import mark_node_as_annotated
 from executorch.backends.cortex_m.quantizer_reporter import (
     logger as quantizer_logger,
+    qspec_repr,
     QuantizerInfo,
     QuantizerReport,
     QuantizerReporter,
     QuantizerReporterUser,
 )
 from torch.export import export
+from torchao.quantization.pt2e import MinMaxObserver, PerChannelMinMaxObserver
+from torchao.quantization.pt2e.quantizer import (
+    DerivedQuantizationSpec,
+    QuantizationSpec,
+    SharedQuantizationSpec,
+)
+
+INT8_WEIGHT_PER_TENSOR_QSPEC = QuantizationSpec(
+    dtype=torch.int8,
+    observer_or_fake_quant_ctr=MinMaxObserver,
+    qscheme=torch.per_tensor_symmetric,
+    quant_min=-127,
+    quant_max=127,
+)
+INT8_ACTIVATION_PER_CHANNEL_QSPEC = QuantizationSpec(
+    dtype=torch.int8,
+    observer_or_fake_quant_ctr=PerChannelMinMaxObserver,
+    qscheme=torch.per_channel_affine,
+    ch_axis=0,
+)
 
 
 class _TwoOpModule(torch.nn.Module):
@@ -43,6 +60,74 @@ def get_quantizer_info(self) -> QuantizerInfo:
         )
 
 
+def test_qspec_repr_quantization_spec_with_range():
+    qspec = QuantizationSpec(
+        torch.int8,
+        MinMaxObserver,
+        quant_min=-42,
+        quant_max=123,
+    )
+    assert qspec_repr(qspec) == "QuantizationSpec(dtype=INT8, range=(-42,123))"
+
+
+def test_qspec_repr_quantization_spec_without_range():
+    qspec = QuantizationSpec(
+        torch.int16,
+        MinMaxObserver,
+    )
+    assert qspec_repr(qspec) == "QuantizationSpec(dtype=INT16)"
+
+
+def test_qspec_repr_quantization_spec_partial_range():
+    qspec = QuantizationSpec(
+        torch.int16,
+        MinMaxObserver,
+        quant_min=-100,
+    )
+    assert qspec_repr(qspec) == "QuantizationSpec(dtype=INT16, range=(-100,None))"
+
+
+def test_qspec_repr_shared_quantization_spec():
+    graph_module = _export_two_op_graph_module()
+    add_node = next(
+        node
+        for node in graph_module.graph.nodes
+        if node.target == torch.ops.aten.add.Tensor
+    )
+    qspec = SharedQuantizationSpec(add_node)
+
+    assert qspec_repr(qspec) == f"SharedQuantizationSpec(edge_or_node={add_node})"
+
+
+def test_qspec_repr_derived_quantization_spec():
+    graph_module = _export_two_op_graph_module()
+    x_node = next(node for node in graph_module.graph.nodes if node.name == "x")
+    y_node = next(node for node in graph_module.graph.nodes if node.name == "y")
+    add_node = next(
+        node
+        for node in graph_module.graph.nodes
+        if node.target == torch.ops.aten.add.Tensor
+    )
+    derived_from = [(x_node, add_node), (y_node, add_node)]
+    qspec = DerivedQuantizationSpec(
+        derived_from=derived_from,
+        derive_qparams_fn=lambda _: (
+            torch.tensor([1.0]),
+            torch.tensor([0], dtype=torch.int32),
+        ),
+        dtype=torch.int32,
+    )
+
+    assert (
+        qspec_repr(qspec)
+        == f"DerivedQuantizationSpec(derived_from={derived_from}, dtype={qspec.dtype})"
+    )
+
+
+def test_qspec_repr_none():
+    assert qspec_repr(None) == "None"
+
+
 def test_warning_log_level(caplog):
     graph_module = _export_two_op_graph_module()
 
@@ -128,11 +213,11 @@ def test_debug_log_level(caplog):
    Rejected due to previous annotation: 0
    Rejected nodes: 0
 
-       NODE NAME    INPUT QSPEC MAP                  OUTPUT QSPEC MAP
-   --  -----------  -------------------------------  ---------------------------------
-   ╒   add          x: INT8_WEIGHT_PER_TENSOR_QSPEC  NO_QSPEC
-   |                y: NO_QSPEC
-   ╘   relu                                          INT8_ACTIVATION_PER_CHANNEL_QSPEC
+       NODE NAME    INPUT QSPEC MAP                                    OUTPUT QSPEC MAP
+   --  -----------  -------------------------------------------------  ----------------------------
+   ╒   add          x: QuantizationSpec(dtype=INT8, range=(-127,127))  None
+   |                y: None
+   ╘   relu                                                            QuantizationSpec(dtype=INT8)
 ----------------------------------------------------------------------------------------------------
 DummyQuantizer using dummy nodes
 Annotating with dummy.config