|
1 | 1 | --- |
| 2 | +- name: "qwopus3.6-27b-v2-mtp" |
| 3 | + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" |
| 4 | + urls: |
| 5 | + - https://huggingface.co/Jackrong/Qwopus3.6-27B-v2-MTP-GGUF |
| 6 | + description: | |
| 7 | + 🪐 Qwopus3.6-27B-v2-MTP |
| 8 | + MTP Release |
| 9 | + |
| 10 | + Multi-Token Prediction reasoning model fine-tuned from Qwen3.6-27B |
| 11 | + |
| 12 | + 🧬 Trace Inversion & Negentropy |
| 13 | + 🧠 27B Parameters |
| 14 | + ⚡ Speculative Decoding |
| 15 | + 🛠️ Coding / DevOps / Math |
| 16 | + |
| 17 | + 💡 What is Qwopus3.6-27B-v2-MTP? |
| 18 | + 🪐 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster. |
| 19 | + |
| 20 | + ⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts. |
| 21 | + 🧩 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories. |
| 22 | + 🧪 GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks. |
| 23 | + 🚀 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not. |
| 24 | + |
| 25 | + ... |
| 26 | + license: "apache-2.0" |
| 27 | + tags: |
| 28 | + - llm |
| 29 | + - gguf |
| 30 | + - reasoning |
| 31 | + overrides: |
| 32 | + backend: llama-cpp |
| 33 | + function: |
| 34 | + automatic_tool_parsing_fallback: true |
| 35 | + grammar: |
| 36 | + disable: true |
| 37 | + known_usecases: |
| 38 | + - chat |
| 39 | + options: |
| 40 | + - use_jinja:true |
| 41 | + - spec_type:draft-mtp |
| 42 | + - spec_n_max:6 |
| 43 | + - spec_p_min:0.75 |
| 44 | + parameters: |
| 45 | + model: llama-cpp/models/Qwopus3.6-27B-v2-MTP-GGUF/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf |
| 46 | + template: |
| 47 | + use_tokenizer_template: true |
| 48 | + files: |
| 49 | + - filename: llama-cpp/models/Qwopus3.6-27B-v2-MTP-GGUF/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf |
| 50 | + sha256: 818d68223be4d8518dac0b3b5604dde633cbbcbae1f491d842a3e26711c6606d |
| 51 | + uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-v2-MTP-GGUF/resolve/main/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf |
2 | 52 | - name: "qwen3.6-40b-claude-4.6-opus-deckard-heretic-uncensored-thinking-neo-code-di-imatrix-max" |
3 | 53 | url: "github:mudler/LocalAI/gallery/virtual.yaml@master" |
4 | 54 | urls: |
|
0 commit comments