-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yaml
More file actions
73 lines (60 loc) · 1.73 KB
/
docker-compose.yaml
File metadata and controls
73 lines (60 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
services:
llama:
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
# rocm better, but 4GB image size
# image: ghcr.io/ggml-org/llama.cpp:server-rocm
container_name: llama
# restart: always
devices:
- /dev/dri
# /dev/kfd is needed for rocm, not for vulkan
- /dev/kfd
# needed for rocm only
ipc: host
# needed for rocm only
security_opt:
- seccomp=unconfined
volumes:
- llama-models-cache:/root/.cache
ports:
- 8080:8080
environment:
# needed for rocm only
ROCBLAS_USE_HIPBLASLT: "0"
# some model ideas that fit on strix halo 128GB
# -hf unsloth/Qwen3-Coder-Next-GGUF:Q8_0
# -hf unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q5_K_XL
# -hf unsloth/Qwen3.5-27B-GGUF:UD-Q8_K_XL
# The 35B model should fit into a 16GB vram gpu
# see https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md for llama-server flags for finetuning
command: >-
-hf unsloth/Qwen3.5-35B-A3B-GGUF:Q3_K_S
--alias Qwen
-fa on
--host 0.0.0.0
--port 8080
--ctx-size 262144
--temp 0.6
--top-p 0.95
--top-k 20
--min-p 0.00
opencode:
build: .
# restart: always
container_name: opencode
volumes:
# mount host opencode config from whereever
- ./opencode.json:/home/user/.config/opencode/opencode.json:z,ro
- opencode-session-cache:/home/user/.local/share/opencode
# TODO: mount exactly the folder(s) you want to work on. Mind 'Z' for SELinux
- /home/youruser/repos:/workspace:Z
ports:
- 4096:4096
# start in webserver mode
command: >-
web
--hostname 0.0.0.0
--port 4096
volumes:
llama-models-cache:
opencode-session-cache: