Arm backend: Fix int8 TABLE domain for sigmoid LUTs (#18973)

xingguo01 · web-flow · commit 32a6cecd392f · 2026-04-27T08:32:01.000+01:00
- Build 8-bit TOSA TABLE inputs from the canonical int8 code range [-128, 127] instead of using integer linspace. - This avoids the duplicated zero and off-by-one LUT shift seen when qmin=-127 and keeps quantized sigmoid TABLE values aligned with the PT2E q/dq eager reference. - Add pass-level regression tests for the full int8 domain and the reported qmin=-127 sigmoid quantization case. cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils @Sebastian-Larsson @robell --------- Signed-off-by: Xingguo Li <xingguo.li@arm.com>
diff --git a/backends/arm/_passes/insert_table_ops.py b/backends/arm/_passes/insert_table_ops.py
@@ -139,6 +139,17 @@ def register_buffer(self, buffer_name: str, buffer: torch.Tensor) -> None:
         """Add buffer to self.exported_program.state_dict."""
         self.exported_program.state_dict[buffer_name] = buffer
 
+    @staticmethod
+    def _get_8bit_table_domain() -> torch.Tensor:
+        """Return the canonical 8-bit TOSA TABLE input domain."""
+        int8_info = torch.iinfo(torch.int8)
+        # torch.arange excludes the end value, so use max + 1 to include 127.
+        return torch.arange(
+            int8_info.min,
+            int8_info.max + 1,
+            dtype=torch.int8,
+        )
+
     def generate_8bit_table_values(
         self,
         torch_op: Callable[[torch.Tensor], torch.Tensor],
@@ -157,17 +168,10 @@ def f(x: torch.Tensor) -> torch.Tensor:
             x = torch_op(x)
             return out_quantargs.quantize_value(x)
 
-        return (
-            f(
-                torch.linspace(
-                    start=in_quantargs.qmin,
-                    end=in_quantargs.qmax,
-                    steps=256,
-                    dtype=torch.int8,
-                )
-            ).to(dtype=torch.int8),
-            0,
+        effective_codes = self._get_8bit_table_domain().clamp(
+            in_quantargs.qmin, in_quantargs.qmax
         )
+        return (f(effective_codes).to(dtype=torch.int8), 0)
 
     def generate_16_bit_table_values(
         self,
diff --git a/backends/arm/test/models/test_conformer.py b/backends/arm/test/models/test_conformer.py
@@ -36,8 +36,8 @@ class TestConformer:
     # .to_executorch step, i.e. after Arm partitioner.
     aten_ops = ["torch.ops.aten._assert_scalar.default"]
 
-    # TODO(MLETORCH-635): reduce tolerance
-    atol = 0.4
+    # TODO(MLETORCH-636): reduce tolerance
+    atol = 0.45
     rtol = 0.4
 
     dim = 16
diff --git a/backends/arm/test/passes/test_insert_table_ops_pass.py b/backends/arm/test/passes/test_insert_table_ops_pass.py
@@ -1,4 +1,4 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -10,6 +10,7 @@
     FoldAndAnnotateQParamsPass,
 )
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
+from executorch.backends.arm._passes.quant_args import QuantArgs
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import PassPipeline
 
@@ -45,3 +46,55 @@ def test_insert_table_ops_tosa_INT(test_data: input_t) -> None:
     pipeline.pop_stage(-1)  # Do not compare output
 
     pipeline.run()
+
+
+def test_generate_8bit_table_domain_covers_full_int8_range() -> None:
+    table_domain = InsertTableOpsPass._get_8bit_table_domain()
+    expected_domain = torch.arange(-128, 128, dtype=torch.int16)
+
+    assert table_domain.dtype == torch.int8
+    assert table_domain.shape == torch.Size((256,))
+    assert torch.equal(table_domain.to(dtype=torch.int16), expected_domain)
+
+
+def test_generate_8bit_table_values_matches_reference_for_qmin_minus_127() -> None:
+    input_qargs = QuantArgs(
+        scale=0.15988604724407196,
+        zp=-17,
+        qmin=-127,
+        qmax=127,
+        dtype=torch.int8,
+    )
+    output_qargs = QuantArgs(
+        scale=0.0039350856095552444,
+        zp=-127,
+        qmin=-127,
+        qmax=127,
+        dtype=torch.int8,
+    )
+
+    insert_table_ops_pass = object.__new__(InsertTableOpsPass)
+    lut_values, lshift = insert_table_ops_pass.generate_8bit_table_values(
+        torch.sigmoid,
+        input_qargs,
+        output_qargs,
+    )
+
+    expected_domain = (
+        torch.arange(-128, 128, dtype=torch.int16)
+        .clamp(input_qargs.qmin, input_qargs.qmax)
+        .to(dtype=torch.int8)
+    )
+    expected_lut_values = output_qargs.quantize_value(
+        torch.sigmoid(input_qargs.dequantize_value(expected_domain))
+    ).to(dtype=torch.int8)
+    zero_input_code = input_qargs.get_zp_per_tensor()
+    zero_input_index = zero_input_code - torch.iinfo(torch.int8).min
+    expected_zero_output = int(
+        output_qargs.quantize_value(torch.tensor([0.5], dtype=torch.float32))[0]
+    )
+
+    assert lshift == 0
+    assert torch.equal(lut_values, expected_lut_values)
+    assert int(lut_values[0]) == int(lut_values[1])
+    assert int(lut_values[zero_input_index]) == expected_zero_output