|
| 1 | +# ============================================================================== |
| 2 | +# Model: Qwopus3.6-35B-A3B-v1 (APEX-I-Balanced) |
| 3 | +# Optimized for: 16GB RTX 4070 Ti Super | AMD 7950X3D |
| 4 | +# Architecture: Hybrid MoE (256 experts, 8 active) + Gated DeltaNet |
| 5 | +# ============================================================================== |
| 6 | + |
| 7 | +SERVICE_NAME="llama-qwopus.service" |
| 8 | +MODEL_PATH="/home/siva/models/Qwen3.6-35B-A3B/mudler/Qwopus3.6-35B-A3B-v1-APEX-I-Balanced.gguf" |
| 9 | +MMPRJ_PATH="" |
| 10 | +MODEL_ALIAS="qwopus-3.6-35b-v1" |
| 11 | + |
| 12 | +# --- COMPUTE (Mixed Affinity per GEMINI.md) --- |
| 13 | +# Cross-CCD string for maximum single-model burst performance |
| 14 | +CPU_AFFINITY="0-7,24-31" |
| 15 | +THREADS=16 |
| 16 | +THREADS_BATCH=16 |
| 17 | +N_GPU_LAYERS=999 |
| 18 | +# N_CPU_MOE=32: Final calibration for stable dual-model execution. |
| 19 | +N_CPU_MOE=36 |
| 20 | +BATCH_SIZE=2048 |
| 21 | +UBATCH_SIZE=1024 |
| 22 | +MLOCK=true |
| 23 | +FLASH_ATTN=on |
| 24 | +NUMA=isolate |
| 25 | +MMPRJ_OFFLOAD=false |
| 26 | +LOG_DISABLE=false |
| 27 | + |
| 28 | +# --- MEMORY (Hybrid Precision Mandate) --- |
| 29 | +# Q8_0 Keys (precision for DeltaNet) and Q4_0 Values (VRAM savings) |
| 30 | +CACHE_TYPE_K="q8_0" |
| 31 | +CACHE_TYPE_V="q4_0" |
| 32 | +CTX_SIZE=262144 # 256k context limit |
| 33 | +PARALLEL=2 # Focused single-stream reasoning |
| 34 | +PORT=8082 |
| 35 | + |
| 36 | +# --- INFRASTRUCTURE --- |
| 37 | +JINJA=true |
| 38 | +JINJA_KWARGS='{"preserve_thinking": true}' |
| 39 | + |
| 40 | +# --- SAMPLING (APEX-I Logic/Coder Chain - Optimized per Research) --- |
| 41 | +# Temperature: 0.75 prevents "Expert Collapse" by ensuring router diversity. |
| 42 | +#TEMP=0.75 |
| 43 | +# Min-P: 0.10 is the recommended production threshold for PRISM-Balanced. |
| 44 | +#MIN_P=0.10 |
| 45 | +#TOP_P=0.95 |
| 46 | +#TOP_K=20 |
| 47 | +#REPEAT_PENALTY=1.05 |
| 48 | +#REPEAT_LAST_N=256.0 |
| 49 | +# --- REFINED DRY SAMPLER (Coding/MoE Focus) --- |
| 50 | +#DRY_MULTIPLIER=0.8 |
| 51 | +#DRY_BASE=1.75 |
| 52 | +#DRY_ALLOWED_LENGTH=2 |
| 53 | +#DRY_PENALTY_LAST_N=4096 |
| 54 | +#SAMPLERS="dry;top_k;min_p" |
| 55 | + |
| 56 | +# --- REASONING --- |
| 57 | +REASONING="on" |
| 58 | +REASONING_FORMAT="auto" |
| 59 | +REASONING_BUDGET=-1 |
| 60 | +REASONING_BUDGET_MESSAGE=" [Logic Finalized] " |
| 61 | + |
| 62 | +# --- UI/UX STANDARDS --- |
| 63 | +EXTRA_ARGS="" |
0 commit comments