From d9e91e770cb1fcd2a0ce1f134eda4f866181a722 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Fri, 15 May 2026 09:01:49 +0800 Subject: [PATCH 1/3] support mimo_v2 Signed-off-by: ZX-ModelCloud --- gptqmodel/models/auto.py | 2 + gptqmodel/models/definitions/__init__.py | 1 + gptqmodel/models/definitions/mimo_v2.py | 86 +++++++++++++++++ tests/models/test_mimo_v2.py | 28 ++++++ tests/test_mimo_v2_support.py | 115 +++++++++++++++++++++++ 5 files changed, 232 insertions(+) create mode 100644 gptqmodel/models/definitions/mimo_v2.py create mode 100644 tests/models/test_mimo_v2.py create mode 100644 tests/test_mimo_v2_support.py diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index f6c590378..748ad69fc 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -125,6 +125,7 @@ from .definitions.llava_qwen2 import LlavaQwen2QModel # noqa: E402 from .definitions.longcat_flash import LongCatFlashQModel # noqa: E402 from .definitions.mimo import MimoQModel # noqa: E402 +from .definitions.mimo_v2 import MimoV2QModel # noqa: E402 from .definitions.minicpm import MiniCPMGPTQ # noqa: E402 from .definitions.minicpm3 import MiniCpm3QModel # noqa: E402 from .definitions.minicpm_o import MiniCPMOQModel # noqa: E402 @@ -280,6 +281,7 @@ "telechat": TeleChat2QModel, "instella": InstellaQModel, "mimo": MimoQModel, + "mimo_v2": MimoV2QModel, "falcon_h1": FalconH1QModel, "zamba": ZambaQModel, "zamba2": Zamba2QModel, diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py index ebb3896bd..d23a1e62e 100644 --- a/gptqmodel/models/definitions/__init__.py +++ b/gptqmodel/models/definitions/__init__.py @@ -51,6 +51,7 @@ from .minicpmv import MiniCPMVQModel from .minicpmv_4_6 import MiniCPMV4_6QModel from .minimax_m2 import MiniMaxM2GPTQ +from .mimo_v2 import MimoV2QModel from .mixtral import MixtralQModel from .mllama import MLlamaQModel from .mobilellm import MobileLLMQModel diff --git a/gptqmodel/models/definitions/mimo_v2.py b/gptqmodel/models/definitions/mimo_v2.py new file mode 100644 index 000000000..a59d231eb --- /dev/null +++ b/gptqmodel/models/definitions/mimo_v2.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: 2026 ModelCloud.ai +# SPDX-License-Identifier: Apache-2.0 + +import json +import os + +from safetensors import safe_open + +from gptqmodel.models.moe_lifecycle import GateUpDownMoELifecycleHooks + +from ..base import BaseQModel + + +class MimoV2QModel(BaseQModel): + # MiMo V2 uses repository-defined configuration/modeling classes. + require_trust_remote_code = True + + dynamic_expert_index = "n_routed_experts" + + pre_lm_head_norm_module = "model.norm" + rotary_embedding = "model.rotary_emb" + + awq_scale_optimize_shape_dependent_modules = ["self_attn.o_proj"] + + moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() + + # MiMo V2 supports both split q/k/v and fused qkv checkpoints, and individual + # layers can be dense MLP or routed MoE according to config.moe_layer_freq. + layer_modules_strict = False + + module_tree = [ + "model", + "layers", + "#", + { + "input_layernorm": ("input_layernorm:!",), + "self_attn": ("qkv_proj:0", "q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), + "post_attention_layernorm": ("post_attention_layernorm:!",), + "mlp:moe:?": { + "": ("gate_proj:0", "up_proj:0", "down_proj:1"), + "gate": ("gate:!",), + "experts": { + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + }, + }, + ] + + @staticmethod + def _checkpoint_has_tensor(model_local_path: str, tensor_name: str) -> bool: + if not model_local_path: + return True + + index_path = os.path.join(model_local_path, "model.safetensors.index.json") + if os.path.isfile(index_path): + with open(index_path, encoding="utf-8") as fp: + weight_map = json.load(fp).get("weight_map", {}) + return tensor_name in weight_map + + tensor_file = os.path.join(model_local_path, "model.safetensors") + if os.path.isfile(tensor_file): + with safe_open(tensor_file, framework="pt", device="cpu") as handler: + return tensor_name in handler.keys() + + return True + + @staticmethod + def _drop_visual_ln_q_bias_if_checkpoint_omits_it(model, model_local_path: str) -> None: + visual = getattr(model, "visual", None) + merger = getattr(visual, "merger", None) + ln_q = getattr(merger, "ln_q", None) + if ln_q is None or getattr(ln_q, "bias", None) is None: + return + + bias_name = "visual.merger.ln_q.bias" + if MimoV2QModel._checkpoint_has_tensor(model_local_path, bias_name): + return + + # MiMo V2.5 Base checkpoints omit this default LayerNorm bias; keep + # the shell parameters aligned so offload-backed save does not chase it. + ln_q.register_parameter("bias", None) + + def after_model_load(self, model, load_quantized_model=False): + model = super().after_model_load(model, load_quantized_model=load_quantized_model) + self._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, self.model_local_path) + return model diff --git a/tests/models/test_mimo_v2.py b/tests/models/test_mimo_v2.py new file mode 100644 index 000000000..a521a605f --- /dev/null +++ b/tests/models/test_mimo_v2.py @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium +from gptqmodel.quantization.config import MoEConfig, ExpertsRoutingOverride +from model_test import ModelTest + + +class TestMimo(ModelTest): + # NATIVE_MODEL_ID = "/monster/data/model/MiMo-V2.5-Base-BF16" + NATIVE_MODEL_ID = "./temp/MiMo-V2.5-Base-BF16" + EVAL_TASKS_SLOW = { + "arc_challenge": { + "chat_template": True, + "acc": {"value": 0.2739, "floor_pct": 0.2}, + "acc_norm": {"value": 0.3055, "floor_pct": 0.2}, + }, + } + EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW) + TRUST_REMOTE_CODE = True + USE_FLASH_ATTN = False + EVAL_BATCH_SIZE = 6 + MOE_CONFIG = MoEConfig(routing=ExpertsRoutingOverride(num_experts_per_tok="all")) + MODEL_COMPAT_FAST_LAYER_POSITION = "first" + SAVE_PATH = "./temp/mimo_v2_gptq" + + def test_mimo(self): + self.quantize_and_evaluate() diff --git a/tests/test_mimo_v2_support.py b/tests/test_mimo_v2_support.py new file mode 100644 index 000000000..9d7389924 --- /dev/null +++ b/tests/test_mimo_v2_support.py @@ -0,0 +1,115 @@ +import json +from types import SimpleNamespace + +from torch import nn + +from gptqmodel.models import auto +from gptqmodel.models.definitions.mimo_v2 import MimoV2QModel + + +_LOCAL_MIMO_V2_5_BASE_MODELING_SIGNATURE = { + "architectures": ["MiMoV2ForCausalLM"], + "attention_projection_layout": "fused_qkv", + "hidden_size": 4096, + "intermediate_size": 16384, + "model_type": "mimo_v2", + "moe_intermediate_size": 2048, + "n_routed_experts": 256, + "num_attention_heads": 64, + "num_experts_per_tok": 8, + "num_hidden_layers": 48, + "num_key_value_heads": 4, +} + + +def test_mimo_v2_model_type_selects_definition(monkeypatch): + fake_config = SimpleNamespace(model_type="mimo_v2") + + monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code) + monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config) + + assert auto.check_and_get_model_definition("/monster/data/model/MiMo-V2.5-Base") is MimoV2QModel + + +def test_mimo_v2_5_base_local_modeling_signature_snapshot(): + assert _LOCAL_MIMO_V2_5_BASE_MODELING_SIGNATURE == { + "architectures": ["MiMoV2ForCausalLM"], + "attention_projection_layout": "fused_qkv", + "hidden_size": 4096, + "intermediate_size": 16384, + "model_type": "mimo_v2", + "moe_intermediate_size": 2048, + "n_routed_experts": 256, + "num_attention_heads": 64, + "num_experts_per_tok": 8, + "num_hidden_layers": 48, + "num_key_value_heads": 4, + } + + +def test_mimo_v2_module_tree_expands_fused_attention_dense_mlp_and_moe_paths(): + layer_modules = MimoV2QModel.simple_layer_modules( + model_config=SimpleNamespace(n_routed_experts=4), + quantize_config=SimpleNamespace(dynamic=None), + ) + flat_modules = {name for block in layer_modules for name in block} + + assert MimoV2QModel.require_trust_remote_code is True + assert MimoV2QModel.layer_modules_strict is False + assert MimoV2QModel.pre_lm_head_norm_module == "model.norm" + assert MimoV2QModel.rotary_embedding == "model.rotary_emb" + assert "self_attn.qkv_proj" in flat_modules + assert "self_attn.q_proj" in flat_modules + assert "self_attn.k_proj" in flat_modules + assert "self_attn.v_proj" in flat_modules + assert "self_attn.o_proj" in flat_modules + assert "mlp.gate_proj" in flat_modules + assert "mlp.up_proj" in flat_modules + assert "mlp.down_proj" in flat_modules + assert "mlp.experts.0.gate_proj" in flat_modules + assert "mlp.experts.0.up_proj" in flat_modules + assert "mlp.experts.0.down_proj" in flat_modules + assert "mlp.gate" not in flat_modules + + +def test_mimo_v2_drops_visual_ln_q_bias_when_checkpoint_omits_it(tmp_path): + model = SimpleNamespace( + visual=SimpleNamespace( + merger=SimpleNamespace( + ln_q=nn.LayerNorm(8), + ) + ) + ) + index = { + "metadata": {}, + "weight_map": { + "visual.merger.ln_q.weight": "model.safetensors", + }, + } + (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8") + + MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path)) + + assert model.visual.merger.ln_q.bias is None + + +def test_mimo_v2_keeps_visual_ln_q_bias_when_checkpoint_has_it(tmp_path): + model = SimpleNamespace( + visual=SimpleNamespace( + merger=SimpleNamespace( + ln_q=nn.LayerNorm(8), + ) + ) + ) + index = { + "metadata": {}, + "weight_map": { + "visual.merger.ln_q.weight": "model.safetensors", + "visual.merger.ln_q.bias": "model.safetensors", + }, + } + (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8") + + MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path)) + + assert model.visual.merger.ln_q.bias is not None From e98b987b46748b6f2e8f7179244347be81d5d65b Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Fri, 15 May 2026 15:13:48 +0800 Subject: [PATCH 2/3] fix test_mimo_v2 Signed-off-by: ZX-ModelCloud --- gptqmodel/models/definitions/mimo_v2.py | 68 ++++++++++++++++++++--- tests/models/test_mimo_v2.py | 4 +- tests/test_mimo_v2_support.py | 74 +++++++++++++++++++++---- 3 files changed, 124 insertions(+), 22 deletions(-) diff --git a/gptqmodel/models/definitions/mimo_v2.py b/gptqmodel/models/definitions/mimo_v2.py index a59d231eb..3443dbc3f 100644 --- a/gptqmodel/models/definitions/mimo_v2.py +++ b/gptqmodel/models/definitions/mimo_v2.py @@ -5,10 +5,12 @@ import os from safetensors import safe_open +from torch import nn from gptqmodel.models.moe_lifecycle import GateUpDownMoELifecycleHooks from ..base import BaseQModel +from ...utils.torch import CPU class MimoV2QModel(BaseQModel): @@ -65,22 +67,70 @@ def _checkpoint_has_tensor(model_local_path: str, tensor_name: str) -> bool: return True @staticmethod - def _drop_visual_ln_q_bias_if_checkpoint_omits_it(model, model_local_path: str) -> None: + def _drop_visual_merger_biases_if_checkpoint_omits_them(model, model_local_path: str) -> None: visual = getattr(model, "visual", None) merger = getattr(visual, "merger", None) - ln_q = getattr(merger, "ln_q", None) - if ln_q is None or getattr(ln_q, "bias", None) is None: + if not isinstance(merger, nn.Module): return - bias_name = "visual.merger.ln_q.bias" - if MimoV2QModel._checkpoint_has_tensor(model_local_path, bias_name): + for module_name, module in merger.named_modules(): + if getattr(module, "bias", None) is None: + continue + + prefix = "visual.merger" + if module_name: + prefix = f"{prefix}.{module_name}" + weight_name = f"{prefix}.weight" + bias_name = f"{prefix}.bias" + if MimoV2QModel._checkpoint_has_tensor(model_local_path, bias_name): + continue + if not MimoV2QModel._checkpoint_has_tensor(model_local_path, weight_name): + continue + + # MiMo V2.5 Base visual merger checkpoints include weights but omit + # default biases; align the shell so offload-backed save skips them. + module.register_parameter("bias", None) + + @staticmethod + def _drop_parameter_if_checkpoint_omits_it(model, model_local_path: str, tensor_name: str) -> None: + if MimoV2QModel._checkpoint_has_tensor(model_local_path, tensor_name): + return + + module_path, _, leaf = tensor_name.rpartition(".") + module = model + for part in module_path.split("."): + module = getattr(module, part, None) + if module is None: + return + + if not isinstance(module, nn.Module) or leaf not in module._parameters: return - # MiMo V2.5 Base checkpoints omit this default LayerNorm bias; keep - # the shell parameters aligned so offload-backed save does not chase it. - ln_q.register_parameter("bias", None) + module.register_parameter(leaf, None) + + @staticmethod + def _drop_checkpoint_omitted_audio_tensors(model, model_local_path: str) -> None: + # Remote MiMo marks this input embedding as load-missing-ignored and + # feeds the local transformer via inputs_embeds, so no trained weight exists. + MimoV2QModel._drop_parameter_if_checkpoint_omits_it( + model, + model_local_path, + "audio_encoder.input_local_transformer.embed_tokens.weight", + ) def after_model_load(self, model, load_quantized_model=False): model = super().after_model_load(model, load_quantized_model=load_quantized_model) - self._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, self.model_local_path) + self._drop_visual_merger_biases_if_checkpoint_omits_them(model, self.model_local_path) + self._drop_checkpoint_omitted_audio_tensors(model, self.model_local_path) return model + + def pre_quantize_generate_hook_start(self): + model = self.model.model + rotary_emb_cls = type(model.rotary_emb) + assert "MiMoV2RotaryEmbedding" in rotary_emb_cls.__name__ + config = model.rotary_emb.config + # MiMoV2RotaryEmbedding cannot be correctly reconstructed via `_build_nonpersistent_buffer_template()`. + # Since it takes three arguments, `_build_nonpersistent_buffer_template()` is unable to infer the `is_swa` parameter. + # Therefore, MiMoV2RotaryEmbedding is manually reconstructed here. + model.rotary_emb = rotary_emb_cls(config=config, is_swa=False, device=CPU) + model.swa_rotary_emb = rotary_emb_cls(config=config, is_swa=True, device=CPU) diff --git a/tests/models/test_mimo_v2.py b/tests/models/test_mimo_v2.py index a521a605f..0cf0f02e4 100644 --- a/tests/models/test_mimo_v2.py +++ b/tests/models/test_mimo_v2.py @@ -7,8 +7,7 @@ class TestMimo(ModelTest): - # NATIVE_MODEL_ID = "/monster/data/model/MiMo-V2.5-Base-BF16" - NATIVE_MODEL_ID = "./temp/MiMo-V2.5-Base-BF16" + NATIVE_MODEL_ID = "/monster/data/model/MiMo-V2.5-Base-BF16" EVAL_TASKS_SLOW = { "arc_challenge": { "chat_template": True, @@ -22,7 +21,6 @@ class TestMimo(ModelTest): EVAL_BATCH_SIZE = 6 MOE_CONFIG = MoEConfig(routing=ExpertsRoutingOverride(num_experts_per_tok="all")) MODEL_COMPAT_FAST_LAYER_POSITION = "first" - SAVE_PATH = "./temp/mimo_v2_gptq" def test_mimo(self): self.quantize_and_evaluate() diff --git a/tests/test_mimo_v2_support.py b/tests/test_mimo_v2_support.py index 9d7389924..23886acfe 100644 --- a/tests/test_mimo_v2_support.py +++ b/tests/test_mimo_v2_support.py @@ -22,6 +22,24 @@ } +class _FakeVisualMerger(nn.Module): + def __init__(self): + super().__init__() + self.ln_q = nn.LayerNorm(8) + self.mlp = nn.Sequential( + nn.Linear(8, 8), + nn.GELU(), + nn.Linear(8, 4), + ) + + +class _FakeAudioEncoder(nn.Module): + def __init__(self): + super().__init__() + self.input_local_transformer = nn.Module() + self.input_local_transformer.embed_tokens = nn.Embedding(16, 8) + + def test_mimo_v2_model_type_selects_definition(monkeypatch): fake_config = SimpleNamespace(model_type="mimo_v2") @@ -72,33 +90,33 @@ def test_mimo_v2_module_tree_expands_fused_attention_dense_mlp_and_moe_paths(): assert "mlp.gate" not in flat_modules -def test_mimo_v2_drops_visual_ln_q_bias_when_checkpoint_omits_it(tmp_path): +def test_mimo_v2_drops_visual_merger_biases_when_checkpoint_omits_them(tmp_path): model = SimpleNamespace( visual=SimpleNamespace( - merger=SimpleNamespace( - ln_q=nn.LayerNorm(8), - ) + merger=_FakeVisualMerger() ) ) index = { "metadata": {}, "weight_map": { "visual.merger.ln_q.weight": "model.safetensors", + "visual.merger.mlp.0.weight": "model.safetensors", + "visual.merger.mlp.2.weight": "model.safetensors", }, } (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8") - MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path)) + MimoV2QModel._drop_visual_merger_biases_if_checkpoint_omits_them(model, str(tmp_path)) assert model.visual.merger.ln_q.bias is None + assert model.visual.merger.mlp[0].bias is None + assert model.visual.merger.mlp[2].bias is None -def test_mimo_v2_keeps_visual_ln_q_bias_when_checkpoint_has_it(tmp_path): +def test_mimo_v2_keeps_visual_merger_biases_when_checkpoint_has_them(tmp_path): model = SimpleNamespace( visual=SimpleNamespace( - merger=SimpleNamespace( - ln_q=nn.LayerNorm(8), - ) + merger=_FakeVisualMerger() ) ) index = { @@ -106,10 +124,46 @@ def test_mimo_v2_keeps_visual_ln_q_bias_when_checkpoint_has_it(tmp_path): "weight_map": { "visual.merger.ln_q.weight": "model.safetensors", "visual.merger.ln_q.bias": "model.safetensors", + "visual.merger.mlp.0.weight": "model.safetensors", + "visual.merger.mlp.0.bias": "model.safetensors", + "visual.merger.mlp.2.weight": "model.safetensors", + "visual.merger.mlp.2.bias": "model.safetensors", }, } (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8") - MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path)) + MimoV2QModel._drop_visual_merger_biases_if_checkpoint_omits_them(model, str(tmp_path)) assert model.visual.merger.ln_q.bias is not None + assert model.visual.merger.mlp[0].bias is not None + assert model.visual.merger.mlp[2].bias is not None + + +def test_mimo_v2_drops_audio_input_embedding_when_checkpoint_omits_it(tmp_path): + model = SimpleNamespace(audio_encoder=_FakeAudioEncoder()) + index = { + "metadata": {}, + "weight_map": { + "audio_encoder.input_local_transformer.layers.0.input_layernorm.weight": "model.safetensors", + }, + } + (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8") + + MimoV2QModel._drop_checkpoint_omitted_audio_tensors(model, str(tmp_path)) + + assert model.audio_encoder.input_local_transformer.embed_tokens.weight is None + + +def test_mimo_v2_keeps_audio_input_embedding_when_checkpoint_has_it(tmp_path): + model = SimpleNamespace(audio_encoder=_FakeAudioEncoder()) + index = { + "metadata": {}, + "weight_map": { + "audio_encoder.input_local_transformer.embed_tokens.weight": "model.safetensors", + }, + } + (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8") + + MimoV2QModel._drop_checkpoint_omitted_audio_tensors(model, str(tmp_path)) + + assert model.audio_encoder.input_local_transformer.embed_tokens.weight is not None From 351df1760d2aac5ff919c8e9e48e52349b1195df Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Fri, 15 May 2026 15:20:00 +0800 Subject: [PATCH 3/3] update README.md Signed-off-by: ZX-ModelCloud --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f3d40bb5b..fdc92c9aa 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ ## Latest News +* 05/15/2026 7.1.0-dev `main`: ✨ Added `mimo_v2` model support * 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` model support * 05/07/2026 7.1.0-dev `main`: ✨ Added `GLM-4.5V`, `GLM-4.6V`, `Zamba` and `Zamba2` model support * 04/29/2026 7.1.0-dev `main`: ✨ Added PoolSideAI `Laguna` model support for fused Laguna MoE checkpoints. Added `ERNIE 4.5 VL MoE`, `Ling-2.6-flash` and NVIDIA `Nemotron 3 Nano Omni` model support. @@ -260,7 +261,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode | ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6 | ✅ | PanGu-α | ✅ | | XVERSE | ✅ | Brumby | ✅ | Hymba | ✅ | Mistral | ✅ | Qwen 1/2/3/3.5 | ✅ | | MiniMax M2 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2-MoE | ✅ | Marin | ✅ | -| InternVL Chat | ✅ | Laguna | ✅ | Zamba / Zamba2 | ✅ | | | | | +| InternVL Chat | ✅ | Laguna | ✅ | Mimo / Mimo V2 | ✅ | Zamba / Zamba2 | ✅ | | | Prism Bonsai GGUF checkpoints are supported for inference only through GPT-QModel's native GGUF path and internal GGUF runtime. Bonsai checkpoints load through the normal model path or repo argument and do not require the external `gguf` package. Prism model quantization is not included.