@@ -5518,13 +5518,70 @@ def set_gguf_parameters(self):
55185518 self.gguf_writer.add_rope_dimension_sections(self._QWEN35_DEFAULT_MROPE_SECTION)
55195519
55205520
5521+ class _Qwen35MtpMixin:
5522+ """Shared MTP wiring for Qwen3.5/3.6 text variants. The HF config carries
5523+ the MTP block under `mtp_num_hidden_layers` and the tensors under
5524+ `mtp.*`; we extend block_count, emit the nextn metadata key, and remap
5525+ `mtp.*` to the standard layer-indexed nextn naming so the existing
5526+ tensor_map handles them."""
5527+
5528+ # Class-level annotations so the type checker understands the attributes
5529+ # available on the concrete subclasses in the MRO
5530+ hparams: dict[str, Any]
5531+ model_arch: gguf.MODEL_ARCH
5532+ gguf_writer: gguf.GGUFWriter
5533+ block_count: int
5534+ tensor_map: gguf.TensorNameMap
5535+
5536+ def __init__(self, *args, **kwargs):
5537+ super().__init__(*args, **kwargs)
5538+ self.block_count = self.hparams["num_hidden_layers"] + self.hparams.get("mtp_num_hidden_layers", 0)
5539+ self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
5540+
5541+ def set_gguf_parameters(self):
5542+ super().set_gguf_parameters() # ty: ignore[unresolved-attribute]
5543+ if (n := self.hparams.get("mtp_num_hidden_layers", 0)) > 0:
5544+ self.gguf_writer.add_nextn_predict_layers(n)
5545+
5546+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5547+ # Multimodal Qwen3.5/3.6 wrap the text model under `model.language_model.*`.
5548+ if name.startswith("model.language_model."):
5549+ name = "model." + name[len("model.language_model."):]
5550+ elif name.startswith("language_model."):
5551+ name = name[len("language_model."):]
5552+
5553+ # Remap MTP block tensors to llama.cpp's layer-indexed nextn naming.
5554+ # HF: mtp.layers.0.* (transformer block at MTP slot 0)
5555+ # mtp.fc / mtp.pre_fc_norm_embedding / mtp.pre_fc_norm_hidden / mtp.norm
5556+ if name.startswith("mtp."):
5557+ n_layer = self.hparams["num_hidden_layers"]
5558+ if name.find("layers.") != -1:
5559+ assert bid is not None
5560+ name = name.replace(f"mtp.layers.{bid}", f"model.layers.{bid + n_layer}")
5561+ else:
5562+ remapper = {
5563+ "mtp.fc": "model.layers.{bid}.eh_proj",
5564+ "mtp.pre_fc_norm_embedding": "model.layers.{bid}.enorm",
5565+ "mtp.pre_fc_norm_hidden": "model.layers.{bid}.hnorm",
5566+ "mtp.norm": "model.layers.{bid}.shared_head.norm",
5567+ }
5568+ stem = Path(name).stem
5569+ suffix = Path(name).suffix
5570+ tmpl = remapper[stem] + suffix
5571+ for b in range(n_layer, self.block_count):
5572+ yield from super().modify_tensors(data_torch, tmpl.format(bid=b), b) # ty: ignore[unresolved-attribute]
5573+ return
5574+
5575+ yield from super().modify_tensors(data_torch, name, bid) # ty: ignore[unresolved-attribute]
5576+
5577+
55215578@ModelBase.register("Qwen3_5ForConditionalGeneration", "Qwen3_5ForCausalLM")
5522- class Qwen3_5TextModel(_Qwen35MRopeMixin, _LinearAttentionVReorderBase):
5579+ class Qwen3_5TextModel(_Qwen35MtpMixin, _Qwen35MRopeMixin, _LinearAttentionVReorderBase):
55235580 model_arch = gguf.MODEL_ARCH.QWEN35
55245581
55255582
55265583@ModelBase.register("Qwen3_5MoeForConditionalGeneration", "Qwen3_5MoeForCausalLM")
5527- class Qwen3_5MoeTextModel(_Qwen35MRopeMixin, _LinearAttentionVReorderBase):
5584+ class Qwen3_5MoeTextModel(_Qwen35MtpMixin, _Qwen35MRopeMixin, _LinearAttentionVReorderBase):
55285585 model_arch = gguf.MODEL_ARCH.QWEN35MOE
55295586
55305587
0 commit comments