|
1 | 1 | --- |
| 2 | +- name: "qwen3.5-9b-deepseek-v4-flash" |
| 3 | + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" |
| 4 | + urls: |
| 5 | + - https://huggingface.co/Jackrong/Qwen3.5-9B-DeepSeek-V4-Flash-GGUF |
| 6 | + description: | |
| 7 | + # Qwen3.5-9B |
| 8 | + |
| 9 | + [](https://chat.qwen.ai) |
| 10 | + |
| 11 | + > [!Note] |
| 12 | + > This repository contains model weights and configuration files for the post-trained model in the Hugging Face Transformers format. |
| 13 | + > |
| 14 | + > These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, KTransformers, etc. |
| 15 | + |
| 16 | + Over recent months, we have intensified our focus on developing foundation models that deliver exceptional utility and performance. Qwen3.5 represents a significant leap forward, integrating breakthroughs in multimodal learning, architectural efficiency, reinforcement learning scale, and global accessibility to empower developers and enterprises with unprecedented capability and efficiency. |
| 17 | + |
| 18 | + ## Qwen3.5 Highlights |
| 19 | + |
| 20 | + Qwen3.5 features the following enhancement: |
| 21 | + |
| 22 | + - **Unified Vision-Language Foundation**: Early fusion training on multimodal tokens achieves cross-generational parity with Qwen3 and outperforms Qwen3-VL models across reasoning, coding, agents, and visual understanding benchmarks. |
| 23 | + |
| 24 | + - **Efficient Hybrid Architecture**: Gated Delta Networks combined with sparse Mixture-of-Experts deliver high-throughput inference with minimal latency and cost overhead. |
| 25 | + |
| 26 | + ... |
| 27 | + license: "apache-2.0" |
| 28 | + tags: |
| 29 | + - llm |
| 30 | + - gguf |
| 31 | + - deepseek |
| 32 | + - reasoning |
| 33 | + icon: https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen3.5/Figures/qwen3.5_small_size_score.png |
| 34 | + overrides: |
| 35 | + backend: llama-cpp |
| 36 | + function: |
| 37 | + automatic_tool_parsing_fallback: true |
| 38 | + grammar: |
| 39 | + disable: true |
| 40 | + known_usecases: |
| 41 | + - chat |
| 42 | + mmproj: llama-cpp/mmproj/Qwen3.5-9B-DeepSeek-V4-Flash-GGUF/mmproj.gguf |
| 43 | + options: |
| 44 | + - use_jinja:true |
| 45 | + parameters: |
| 46 | + min_p: 0 |
| 47 | + model: llama-cpp/models/Qwen3.5-9B-DeepSeek-V4-Flash-GGUF/Qwen3.5-9B-DeepSeek-V4-Flash-Q4_K_M.gguf |
| 48 | + presence_penalty: 1.5 |
| 49 | + repeat_penalty: 1 |
| 50 | + temperature: 0.7 |
| 51 | + top_k: 20 |
| 52 | + top_p: 0.8 |
| 53 | + template: |
| 54 | + use_tokenizer_template: true |
| 55 | + files: |
| 56 | + - filename: llama-cpp/models/Qwen3.5-9B-DeepSeek-V4-Flash-GGUF/Qwen3.5-9B-DeepSeek-V4-Flash-Q4_K_M.gguf |
| 57 | + sha256: 9be227448d319e6a7acca8056b71bf7d9a2c6b2811986e6658a9dedc208d0ada |
| 58 | + uri: https://huggingface.co/Jackrong/Qwen3.5-9B-DeepSeek-V4-Flash-GGUF/resolve/main/Qwen3.5-9B-DeepSeek-V4-Flash-Q4_K_M.gguf |
| 59 | + - filename: llama-cpp/mmproj/Qwen3.5-9B-DeepSeek-V4-Flash-GGUF/mmproj.gguf |
| 60 | + sha256: d589acfddbed3ba291e429330360ded8e67b0910dd415aec2fe7c32b0665f859 |
| 61 | + uri: https://huggingface.co/Jackrong/Qwen3.5-9B-DeepSeek-V4-Flash-GGUF/resolve/main/mmproj.gguf |
2 | 62 | - name: "chroma1-hd" |
3 | 63 | url: "github:mudler/LocalAI/gallery/virtual.yaml@master" |
4 | 64 | license: apache-2.0 |
|
0 commit comments