Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 31 additions & 11 deletions backends/apple/coreml/compiler/torch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,20 @@
import torch as _torch
from coremltools import _logger
from coremltools.converters.mil.frontend import _utils

_IOS18_QUANT_HINT = (
"ExecuTorch hint: pass `compile_specs=CoreMLBackend.generate_compile_specs("
"minimum_deployment_target=ct.target.iOS18)` (or higher) to "
"`CoreMLPartitioner` when lowering models that use `quantize_(...)`."
)


def _raise_with_executorch_hint(err: Exception) -> "BaseException":
"""Re-raise a coremltools quantization error with ExecuTorch-specific guidance."""
msg = str(err)
if "iOS18" in msg or "iOS 18" in msg:
raise ValueError(f"{msg}\n{_IOS18_QUANT_HINT}") from err
raise err
from coremltools.converters.mil.frontend.torch.ops import (
_get_inputs,
_get_kwinputs,
Expand Down Expand Up @@ -159,12 +173,15 @@ def dequantize_affine(context, node):
f"Unsupported quantization range: {quant_min} to {quant_max}. CoreML only supports 4-bit and 8-bit quantization."
)

output = _utils._construct_constexpr_dequant_op(
int_data.astype(quantized_np_dtype),
zero_point,
scale,
name=node.name,
)
try:
output = _utils._construct_constexpr_dequant_op(
int_data.astype(quantized_np_dtype),
zero_point,
scale,
name=node.name,
)
except ValueError as e:
_raise_with_executorch_hint(e)
context.add(output, node.name)


Expand Down Expand Up @@ -211,9 +228,12 @@ def dequantize_codebook(context, node):
f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
)

output = _utils._construct_constexpr_lut_op(
codes.astype(np.int8),
codebook,
name=node.name,
)
try:
output = _utils._construct_constexpr_lut_op(
codes.astype(np.int8),
codebook,
name=node.name,
)
except ValueError as e:
_raise_with_executorch_hint(e)
context.add(output, node.name)
30 changes: 30 additions & 0 deletions backends/apple/coreml/test/test_torch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,36 @@ def forward(self, x):
self._compare_outputs(et_prog, model, example_inputs)


def test_dequantize_affine_below_ios18_raises_with_hint(self):
"""
Regression test for https://github.com/pytorch/executorch/issues/13122.

`quantize_(...)` with blockwise / int4 configurations requires iOS18.
coremltools raises a ValueError that does not mention how to fix the
deployment target on the ExecuTorch side; we wrap it to add the
partitioner-level guidance.
"""
model = torch.nn.Linear(64, 64)
quantize_(
model,
IntxWeightOnlyConfig(weight_dtype=torch.int4, granularity=PerGroup(32)),
)
ep = torch.export.export(model.eval(), (torch.randn(1, 64),), strict=True)
with self.assertRaises(ValueError) as cm:
executorch.exir.to_edge_transform_and_lower(
ep,
partitioner=[
self._coreml_partitioner(
minimum_deployment_target=ct.target.iOS17
)
],
)
msg = str(cm.exception)
self.assertIn("iOS18", msg)
self.assertIn("CoreMLPartitioner", msg)
self.assertIn("minimum_deployment_target", msg)


if __name__ == "__main__":
test_runner = TestTorchOps()
test_runner.test_dequantize_affine_b4w_embedding()
Expand Down
Loading