Update

manuelcandales · manuelcandales · commit 36d45ef40cb4 · 2026-04-20T15:05:25.000-04:00
[ghstack-poisoned]
diff --git a/backends/apple/metal/ops/gather_qmv.py b/backends/apple/metal/ops/gather_qmv.py
@@ -42,9 +42,7 @@ def gather_qmv(
     return y
 
 
-def _quantize_int4_affine(
-    w: Tensor, group_size: int
-) -> tuple[Tensor, Tensor, Tensor]:
+def _quantize_int4_affine(w: Tensor, group_size: int) -> tuple[Tensor, Tensor, Tensor]:
     """Quantize float weights to packed INT4 using MLX affine format.
 
     Args:
@@ -67,8 +65,12 @@ def _quantize_int4_affine(
     scales = ((g_max - g_min) / 15.0).clamp(min=1e-8)
     biases = g_min
     w_int = (
-        (w_groups - biases.unsqueeze(-1)) / scales.unsqueeze(-1)
-    ).round().clamp(0, 15).to(torch.uint8).reshape(*leading, K)
+        ((w_groups - biases.unsqueeze(-1)) / scales.unsqueeze(-1))
+        .round()
+        .clamp(0, 15)
+        .to(torch.uint8)
+        .reshape(*leading, K)
+    )
     packed = w_int[..., 0::2] | (w_int[..., 1::2] << 4)
     return packed, scales, biases
 
diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py
@@ -702,9 +702,7 @@ class GatherQMV(nn.Module):
 
     def __init__(self):
         super().__init__()
-        from executorch.backends.apple.metal.ops.gather_qmv import (
-            _quantize_int4_affine,
-        )
+        from executorch.backends.apple.metal.ops.gather_qmv import _quantize_int4_affine
 
         E, N, K, gs = 4, 64, 128, 32
         torch.manual_seed(0)