Skip to content

Commit 3b5e0b0

Browse files
committed
feat: add qwopus 3.6 35b a3b v1 configuration
1 parent 6d1509f commit 3b5e0b0

1 file changed

Lines changed: 63 additions & 0 deletions

File tree

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# ==============================================================================
2+
# Model: Qwopus3.6-35B-A3B-v1 (APEX-I-Balanced)
3+
# Optimized for: 16GB RTX 4070 Ti Super | AMD 7950X3D
4+
# Architecture: Hybrid MoE (256 experts, 8 active) + Gated DeltaNet
5+
# ==============================================================================
6+
7+
SERVICE_NAME="llama-qwopus.service"
8+
MODEL_PATH="/home/siva/models/Qwen3.6-35B-A3B/mudler/Qwopus3.6-35B-A3B-v1-APEX-I-Balanced.gguf"
9+
MMPRJ_PATH=""
10+
MODEL_ALIAS="qwopus-3.6-35b-v1"
11+
12+
# --- COMPUTE (Mixed Affinity per GEMINI.md) ---
13+
# Cross-CCD string for maximum single-model burst performance
14+
CPU_AFFINITY="0-7,24-31"
15+
THREADS=16
16+
THREADS_BATCH=16
17+
N_GPU_LAYERS=999
18+
# N_CPU_MOE=32: Final calibration for stable dual-model execution.
19+
N_CPU_MOE=36
20+
BATCH_SIZE=2048
21+
UBATCH_SIZE=1024
22+
MLOCK=true
23+
FLASH_ATTN=on
24+
NUMA=isolate
25+
MMPRJ_OFFLOAD=false
26+
LOG_DISABLE=false
27+
28+
# --- MEMORY (Hybrid Precision Mandate) ---
29+
# Q8_0 Keys (precision for DeltaNet) and Q4_0 Values (VRAM savings)
30+
CACHE_TYPE_K="q8_0"
31+
CACHE_TYPE_V="q4_0"
32+
CTX_SIZE=262144 # 256k context limit
33+
PARALLEL=2 # Focused single-stream reasoning
34+
PORT=8082
35+
36+
# --- INFRASTRUCTURE ---
37+
JINJA=true
38+
JINJA_KWARGS='{"preserve_thinking": true}'
39+
40+
# --- SAMPLING (APEX-I Logic/Coder Chain - Optimized per Research) ---
41+
# Temperature: 0.75 prevents "Expert Collapse" by ensuring router diversity.
42+
#TEMP=0.75
43+
# Min-P: 0.10 is the recommended production threshold for PRISM-Balanced.
44+
#MIN_P=0.10
45+
#TOP_P=0.95
46+
#TOP_K=20
47+
#REPEAT_PENALTY=1.05
48+
#REPEAT_LAST_N=256.0
49+
# --- REFINED DRY SAMPLER (Coding/MoE Focus) ---
50+
#DRY_MULTIPLIER=0.8
51+
#DRY_BASE=1.75
52+
#DRY_ALLOWED_LENGTH=2
53+
#DRY_PENALTY_LAST_N=4096
54+
#SAMPLERS="dry;top_k;min_p"
55+
56+
# --- REASONING ---
57+
REASONING="on"
58+
REASONING_FORMAT="auto"
59+
REASONING_BUDGET=-1
60+
REASONING_BUDGET_MESSAGE=" [Logic Finalized] "
61+
62+
# --- UI/UX STANDARDS ---
63+
EXTRA_ARGS=""

0 commit comments

Comments
 (0)