Wrap iOS18 quantization errors with ExecuTorch-specific hint

john-rocky · john-rocky · commit 6d191fa3bcbf · 2026-05-01T13:59:32.000+09:00
When the user lowers a model that was prepared with torchao's `quantize_(...)` (e.g. blockwise int4) but does not pass an iOS18+ `minimum_deployment_target` to the CoreML partitioner, coremltools raises a generic ValueError pointing at coremltools internals. The user has no obvious way to discover that the target is set via `CoreMLBackend.generate_compile_specs` and plumbed through `CoreMLPartitioner(compile_specs=...)`. Catch the ValueError around the two coremltools utilities used by our overridden `dequantize_affine` / `dequantize_codebook` handlers and re-raise it with an ExecuTorch-flavored hint that shows the exact partitioner call to make. Fixes #13122.
diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py
@@ -12,6 +12,20 @@
 import torch as _torch
 from coremltools import _logger
 from coremltools.converters.mil.frontend import _utils
+
+_IOS18_QUANT_HINT = (
+    "ExecuTorch hint: pass `compile_specs=CoreMLBackend.generate_compile_specs("
+    "minimum_deployment_target=ct.target.iOS18)` (or higher) to "
+    "`CoreMLPartitioner` when lowering models that use `quantize_(...)`."
+)
+
+
+def _raise_with_executorch_hint(err: Exception) -> "BaseException":
+    """Re-raise a coremltools quantization error with ExecuTorch-specific guidance."""
+    msg = str(err)
+    if "iOS18" in msg or "iOS 18" in msg:
+        raise ValueError(f"{msg}\n{_IOS18_QUANT_HINT}") from err
+    raise err
 from coremltools.converters.mil.frontend.torch.ops import (
     _get_inputs,
     _get_kwinputs,
@@ -159,12 +173,15 @@ def dequantize_affine(context, node):
             f"Unsupported quantization range: {quant_min} to {quant_max}.  CoreML only supports 4-bit and 8-bit quantization."
         )
 
-    output = _utils._construct_constexpr_dequant_op(
-        int_data.astype(quantized_np_dtype),
-        zero_point,
-        scale,
-        name=node.name,
-    )
+    try:
+        output = _utils._construct_constexpr_dequant_op(
+            int_data.astype(quantized_np_dtype),
+            zero_point,
+            scale,
+            name=node.name,
+        )
+    except ValueError as e:
+        _raise_with_executorch_hint(e)
     context.add(output, node.name)
 
 
@@ -211,9 +228,12 @@ def dequantize_codebook(context, node):
             f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
         )
 
-    output = _utils._construct_constexpr_lut_op(
-        codes.astype(np.int8),
-        codebook,
-        name=node.name,
-    )
+    try:
+        output = _utils._construct_constexpr_lut_op(
+            codes.astype(np.int8),
+            codebook,
+            name=node.name,
+        )
+    except ValueError as e:
+        _raise_with_executorch_hint(e)
     context.add(output, node.name)
diff --git a/backends/apple/coreml/test/test_torch_ops.py b/backends/apple/coreml/test/test_torch_ops.py
@@ -318,6 +318,36 @@ def forward(self, x):
         self._compare_outputs(et_prog, model, example_inputs)
 
 
+    def test_dequantize_affine_below_ios18_raises_with_hint(self):
+        """
+        Regression test for https://github.com/pytorch/executorch/issues/13122.
+
+        `quantize_(...)` with blockwise / int4 configurations requires iOS18.
+        coremltools raises a ValueError that does not mention how to fix the
+        deployment target on the ExecuTorch side; we wrap it to add the
+        partitioner-level guidance.
+        """
+        model = torch.nn.Linear(64, 64)
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
+        )
+        ep = torch.export.export(model.eval(), (torch.randn(1, 64),), strict=True)
+        with self.assertRaises(ValueError) as cm:
+            executorch.exir.to_edge_transform_and_lower(
+                ep,
+                partitioner=[
+                    self._coreml_partitioner(
+                        minimum_deployment_target=ct.target.iOS17
+                    )
+                ],
+            )
+        msg = str(cm.exception)
+        self.assertIn("iOS18", msg)
+        self.assertIn("CoreMLPartitioner", msg)
+        self.assertIn("minimum_deployment_target", msg)
+
+
 if __name__ == "__main__":
     test_runner = TestTorchOps()
     test_runner.test_dequantize_affine_b4w_embedding()