You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
help="(Experimental) Export multimodal projector (mmproj) for vision models. This will only work on some vision models. A prefix 'mmproj-' will be added to the output file name.",
14111
14145
)
14146
+
parser.add_argument(
14147
+
"--mtp", action="store_true",
14148
+
help="(Experimental) Export only the multi-token prediction (MTP) head as a separate GGUF, suitable for use as a speculative draft. Output file name will get a '-MTP' suffix.",
14149
+
)
14150
+
parser.add_argument(
14151
+
"--no-mtp", action="store_true",
14152
+
help="(Experimental) Exclude the multi-token prediction (MTP) head from the converted GGUF. Pair with --mtp on a second run to publish trunk and MTP as two files. Note: the split form duplicates embeddings, so the bundled default is more space-efficient overall.",
14153
+
)
14112
14154
parser.add_argument(
14113
14155
"--mistral-format", action="store_true",
14114
14156
help="Whether the model is stored following the Mistral format.",
@@ -14268,6 +14310,20 @@ def main() -> None:
14268
14310
else:
14269
14311
model_class = MistralModel
14270
14312
14313
+
if args.mtp and args.no_mtp:
14314
+
logger.error("--mtp and --no-mtp are mutually exclusive")
14315
+
sys.exit(1)
14316
+
14317
+
if (args.mtp or args.no_mtp) and not issubclass(model_class, _Qwen35MtpMixin):
14318
+
logger.error("--mtp / --no-mtp are only supported for Qwen3.5/3.6 text variants today")
14319
+
sys.exit(1)
14320
+
14321
+
# set on the class so __init__ / filter_tensors see the correct mode
0 commit comments