Move clamp to independent quantization in annotator

JakeStevens · facebook-github-bot · commit 703d869712a5 · 2026-03-05T09:32:58.000-08:00
Summary:
The clamp operation was incorrectly placed in `_one_to_one_shared_input_qspec`,
which causes the input and output observers to be shared. This is problematic
because clamp explicitly modifies the value range by enforcing min/max bounds.

When using clamp to prevent undefined behavior (e.g., clamping inputs to rsqrt
to be positive), the pre-clamp and post-clamp ranges can be very different.
With shared observers, the pre-clamp (smaller) values dominate the min_val,
causing incorrect quantization parameters for the post-clamp tensor.

This fix moves clamp to `_one_to_one`, giving it independent input/output
quantization so each observer properly tracks its respective range.

Differential Revision: D92408418
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
@@ -433,6 +433,10 @@ def _match_pattern(
     torch.ops.aten.acos.default,
     torch.ops.aten.cumsum.default,
     torch.ops.aten.tan.default,
+    # Clamp modifies the value range (enforces min/max bounds), so it needs
+    # independent input/output quantization to properly track the clamped range.
+    torch.ops.aten.clamp.default,
+    torch.ops.aten.clamp.Tensor,
 ]
 
 _one_to_one_shared_input_qspec = [
@@ -480,8 +484,6 @@ def _match_pattern(
     torch.ops.aten.pad.default,
     torch.ops.aten.amax.default,
     torch.ops.aten.amin.default,
-    torch.ops.aten.clamp.default,
-    torch.ops.aten.clamp.Tensor,
     torch.ops.aten.unflatten.int,
     torch.ops.aten.gather.default,
     torch.ops.aten.unfold_copy.default,
diff --git a/backends/arm/test/quantizer/test_clamp_quantization.py b/backends/arm/test/quantizer/test_clamp_quantization.py
@@ -0,0 +1,100 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright 2024-2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Verify that clamp uses independent input/output quantization.
+
+Clamp modifies the value range by enforcing min/max bounds, so its output
+observer must be independent from its input observer. When observers are
+shared, the pre-clamp (wider) values dominate the observed range and the
+post-clamp tensor gets incorrect quantization parameters.
+
+This test feeds a wide-range input through a narrow clamp and checks that
+the quantization scale for the clamp output differs from the input scale.
+"""
+
+import torch
+from executorch.backends.arm.quantizer import (
+    get_symmetric_quantization_config,
+    TOSAQuantizer,
+)
+from executorch.backends.arm.tosa import TosaSpecification
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
+Q_PER_TENSOR = torch.ops.quantized_decomposed.quantize_per_tensor.default
+DQ_PER_TENSOR = torch.ops.quantized_decomposed.dequantize_per_tensor.default
+
+
+class ClampModel(torch.nn.Module):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.clamp(x, min=0.0, max=1.0)
+
+
+def test_clamp_has_different_input_output_qparams():
+    """Input and output scales must differ when clamp narrows the range.
+
+    A wide-range input ([-50, 50]) clamped to [0, 1] should produce a much
+    smaller output scale than input scale, because the output observer only
+    sees values in [0, 1] while the input observer sees the full [-50, 50].
+
+    Before the fix (clamp in _one_to_one_shared_input_qspec), both observers
+    were shared and would produce identical scales — the wider input range
+    dominated, wasting output precision.
+    """
+    model = ClampModel()
+    model.eval()
+
+    # Use deterministic wide-range calibration data so the input observer
+    # sees [-50, 50] while the output observer sees only [0, 1].
+    calibration_input = torch.linspace(-50, 50, 200).reshape(1, 200)
+
+    tosa_spec = TosaSpecification.create_from_string("TOSA-1.0+INT")
+    quantizer = TOSAQuantizer(tosa_spec)
+    quantizer.set_global(get_symmetric_quantization_config(is_per_channel=False))
+
+    exported = torch.export.export(model, (calibration_input,))
+    prepared = prepare_pt2e(exported.module(), quantizer)
+    prepared(calibration_input)
+    converted = convert_pt2e(prepared)
+
+    # After conversion the graph has explicit quantize/dequantize nodes.
+    # For clamp with independent qspecs the pattern is:
+    #   dequantize_per_tensor(input_scale) -> clamp -> quantize_per_tensor(output_scale)
+    # With shared qspecs both scales would be identical.
+    clamp_nodes = [
+        n
+        for n in converted.graph.nodes
+        if n.target in (torch.ops.aten.clamp.default, torch.ops.aten.clamp.Tensor)
+    ]
+    assert (
+        len(clamp_nodes) == 1
+    ), f"Expected exactly 1 clamp node, found {len(clamp_nodes)}"
+    clamp_node = clamp_nodes[0]
+
+    # Get the dequant feeding clamp's input — its scale is arg[1].
+    input_dq = clamp_node.args[0]
+    assert (
+        input_dq.target == DQ_PER_TENSOR
+    ), f"Expected dequantize_per_tensor before clamp, got {input_dq.target}"
+    input_scale = float(input_dq.args[1])
+
+    # Get the quant consuming clamp's output — its scale is arg[1].
+    clamp_users = list(clamp_node.users)
+    assert (
+        len(clamp_users) == 1
+    ), f"Expected exactly 1 user of clamp, found {len(clamp_users)}"
+    output_q = clamp_users[0]
+    assert (
+        output_q.target == Q_PER_TENSOR
+    ), f"Expected quantize_per_tensor after clamp, got {output_q.target}"
+    output_scale = float(output_q.args[1])
+
+    # With independent quantization the output scale (tracking [0, 1]) must
+    # be much smaller than the input scale (tracking [-50, 50]).
+    assert output_scale < input_scale, (
+        f"Clamp output scale ({output_scale}) should be smaller than input "
+        f"scale ({input_scale}) because clamp narrows [−50, 50] → [0, 1]. "
+        "If they are equal, clamp is using shared observers (bug)."
+    )
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
@@ -36,6 +36,7 @@ def define_arm_tests():
     # Quantization
     test_files += [
         "quantizer/test_generic_annotater.py",
+        "quantizer/test_clamp_quantization.py",
     ]
 
     # Misc tests

Original file line number	Diff line number	Diff line change
`@@ -36,6 +36,7 @@ def define_arm_tests():`
`36`	`36`	`# Quantization`
`37`	`37`	`test_files += [`
`38`	`38`	`"quantizer/test_generic_annotater.py",`
	`39`	`+ "quantizer/test_clamp_quantization.py",`
`39`	`40`	`]`
`40`	`41`
`41`	`42`	`# Misc tests`