You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
help="(Experimental) Export multimodal projector (mmproj) for vision models. This will only work on some vision models. A prefix 'mmproj-' will be added to the output file name.",
14036
14070
)
14071
+
parser.add_argument(
14072
+
"--mtp", action="store_true",
14073
+
help="(Experimental) Export only the multi-token prediction (MTP) head as a separate GGUF, suitable for use as a speculative draft. Output file name will get a '-MTP' suffix.",
14074
+
)
14075
+
parser.add_argument(
14076
+
"--no-mtp", action="store_true",
14077
+
help="(Experimental) Exclude the multi-token prediction (MTP) head from the converted GGUF. Pair with --mtp on a second run to publish trunk and MTP as two files. Note: the split form duplicates embeddings, so the bundled default is more space-efficient overall.",
14078
+
)
14037
14079
parser.add_argument(
14038
14080
"--mistral-format", action="store_true",
14039
14081
help="Whether the model is stored following the Mistral format.",
@@ -14193,6 +14235,20 @@ def main() -> None:
14193
14235
else:
14194
14236
model_class = MistralModel
14195
14237
14238
+
if args.mtp and args.no_mtp:
14239
+
logger.error("--mtp and --no-mtp are mutually exclusive")
14240
+
sys.exit(1)
14241
+
14242
+
if (args.mtp or args.no_mtp) and not issubclass(model_class, _Qwen35MtpMixin):
14243
+
logger.error("--mtp / --no-mtp are only supported for Qwen3.5/3.6 text variants today")
14244
+
sys.exit(1)
14245
+
14246
+
# set on the class so __init__ / filter_tensors see the correct mode
0 commit comments