update constants.py

am17an · am17an · commit c846c0b46163 · 2026-01-27T16:12:18.000+01:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -10055,20 +10055,23 @@ def transform_nibble_layout(self, tensor):
         out = (out_h >> 4) | (out_l << 4)
         return out
 
-    def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
+    def _repack_mxfp4(self, blocks: Tensor, scales: Tensor) -> Tensor:
+        """Repack blocks and scales into MXFP4 format, returns tensor."""
         assert blocks.dtype == torch.uint8
         assert scales.dtype == torch.uint8
         scales = scales.unsqueeze(-1)
         assert len(blocks.shape) == 4
         assert len(scales.shape) == 4
         blocks = self.transform_nibble_layout(blocks)
         new_data = torch.concat((scales, blocks), dim=-1)
-        new_shape = [new_data.shape[0], new_data.shape[1], new_data.shape[2] * 32]
-        logger.info(f"Repacked {new_name} with shape {new_shape} and quantization MXFP4")
         # flatten last dim
         new_data = new_data.view(new_data.shape[0], new_data.shape[1], new_data.shape[2] * new_data.shape[3])
-        new_data = new_data.numpy()
-        self.gguf_writer.add_tensor(new_name, new_data, raw_dtype=gguf.GGMLQuantizationType.MXFP4)
+        return new_data
+
+    def tensor_force_quant(self, name, new_name, bid, n_dims):
+        if any(x in new_name for x in ("ffn_gate_exps", "ffn_up_exps", "ffn_down_exps", "ffn_gate_up_exps")):
+            return gguf.GGMLQuantizationType.MXFP4
+        return super().tensor_force_quant(name, new_name, bid, n_dims)
 
     def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
         blocks0: Tensor = torch.zeros(1)
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -2483,6 +2483,7 @@ class MODEL_TENSOR(IntEnum):
         MODEL_TENSOR.FFN_GATE,
         MODEL_TENSOR.FFN_DOWN,
         MODEL_TENSOR.FFN_UP,
+        MODEL_TENSOR.FFN_GATE_UP_EXP,
         MODEL_TENSOR.FFN_GATE_EXP,
         MODEL_TENSOR.FFN_DOWN_EXP,
         MODEL_TENSOR.FFN_UP_EXP,