-
-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathdocker-compose-local.yml
More file actions
63 lines (63 loc) · 2.31 KB
/
docker-compose-local.yml
File metadata and controls
63 lines (63 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
services:
ezlocalai:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
environment:
- EZLOCALAI_URL=${EZLOCALAI_URL:-http://localhost:8091}
- EZLOCALAI_API_KEY=${EZLOCALAI_API_KEY:-}
- DEFAULT_MODEL=${DEFAULT_MODEL:-unsloth/Qwen3.5-4B-GGUF}
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
- IMAGE_ENABLED=${IMAGE_ENABLED:-false}
- IMG_MODEL=${IMG_MODEL:-}
- VIDEO_ENABLED=${VIDEO_ENABLED:-false}
- VIDEO_MODEL=${VIDEO_MODEL:-}
- LLM_BATCH_SIZE=${LLM_BATCH_SIZE:-auto}
- MAX_QUEUE_SIZE=${MAX_QUEUE_SIZE:-100}
- REQUEST_TIMEOUT=${REQUEST_TIMEOUT:-300}
- VOICE_SERVER=${VOICE_SERVER:-}
- VOICE_SERVER_API_KEY=${VOICE_SERVER_API_KEY:-}
- TTS_ENABLED=${TTS_ENABLED:-true}
- TTS_N_PARALLEL=${TTS_N_PARALLEL:-1}
- STT_ENABLED=${STT_ENABLED:-true}
- STT_N_PARALLEL=${STT_N_PARALLEL:-1}
- EMBEDDING_ENABLED=${EMBEDDING_ENABLED:-true}
- EMBEDDING_SERVER=${EMBEDDING_SERVER:-}
- EMBEDDING_SERVER_API_KEY=${EMBEDDING_SERVER_API_KEY:-}
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-Qwen/Qwen3-Embedding-0.6B-GGUF}
- EMBEDDING_MODEL_ALIAS=${EMBEDDING_MODEL_ALIAS:-Qwen3-Embedding-0.6B}
- EMBEDDING_QUANT_TYPE=${EMBEDDING_QUANT_TYPE:-Q8_0}
- EMBEDDING_CONTEXT_LENGTH=${EMBEDDING_CONTEXT_LENGTH:-32768}
- EMBEDDING_N_PARALLEL=${EMBEDDING_N_PARALLEL:-1}
- EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-512}
- EMBEDDING_UBATCH_SIZE=${EMBEDDING_UBATCH_SIZE:-512}
- EMBEDDING_GPU_LAYERS=${EMBEDDING_GPU_LAYERS:-auto}
- EMBEDDING_VRAM_SAFETY_MARGIN=${EMBEDDING_VRAM_SAFETY_MARGIN:-0.75}
- EMBEDDING_KV_CACHE_TYPE=${EMBEDDING_KV_CACHE_TYPE:-f16}
- EMBEDDING_KEEP_LOADED=${EMBEDDING_KEEP_LOADED:-true}
- LAZY_LOAD_VOICE=${LAZY_LOAD_VOICE:-true}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-sf", "http://localhost:8091/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s
logging:
driver: json-file
options:
max-size: "500m"
max-file: "5"
ulimits:
nofile:
soft: 65535
hard: 65535
ports:
- "8091:8091"
volumes:
- ./models:/app/models
- ./outputs:/app/outputs
- ./voices:/app/voices
- ./whispercpp:/app/whispercpp