-
Notifications
You must be signed in to change notification settings - Fork 52
Expand file tree
/
Copy pathdocker-compose.cpp.yml
More file actions
83 lines (81 loc) · 2.57 KB
/
Copy pathdocker-compose.cpp.yml
File metadata and controls
83 lines (81 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# MiniCPM-o C++ backend deployment (docker compose)
#
# Topology:
# gateway x1 host :${GATEWAY_HOST_PORT:-8006} -> container :8006
# cpp-worker-backend x1 one container with:
# - llama-omni-server backend on 127.0.0.1:22500
# - Python worker on 0.0.0.0:22400, forwarding to the local backend
#
# Usage:
# mkdir -p certs data
# openssl req -x509 -newkey rsa:2048 -nodes -days 365 \
# -keyout certs/key.pem -out certs/cert.pem -subj "/CN=minicpm-o"
# GGUF_MODEL_HOST_PATH=/path/to/MiniCPM-o-4_5-gguf \
# GATEWAY_HOST_PORT=8006 \
# CPP_GPU_ID=0 \
# docker compose -f docker-compose.cpp.yml up -d --build
# docker compose -f docker-compose.cpp.yml logs -f gateway
# docker compose -f docker-compose.cpp.yml logs -f cpp-worker-backend
services:
cpp-worker-backend:
build:
context: .
dockerfile: docker/Dockerfile.cpp-worker-backend
args:
LLAMA_OMNI_REFSPEC: ${LLAMA_OMNI_REFSPEC:-master}
LLAMA_OMNI_REF: ${LLAMA_OMNI_REF:-origin/master}
image: minicpm-cpp-worker-backend:dev
environment:
GPU_ID: "0"
BACKEND_PORT: "22500"
WORKER_PORT: "22400"
GGUF_MODEL: /models/MiniCPM-o-4_5-gguf/${GGUF_MODEL_FILE:-MiniCPM-o-4_5-Q4_K_M.gguf}
LLAMA_SERVER_EXTRA_ARGS: "${LLAMA_SERVER_EXTRA_ARGS:--c 8192}"
volumes:
- ${GGUF_MODEL_HOST_PATH:?set GGUF_MODEL_HOST_PATH to the host GGUF model dir}:/models/MiniCPM-o-4_5-gguf:ro
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${CPP_GPU_ID:-0}"]
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-sf", "http://127.0.0.1:22400/health"]
interval: 15s
timeout: 5s
retries: 40
start_period: 120s
restart: unless-stopped
logging:
driver: json-file
options: {max-size: "50m", max-file: "3"}
gateway:
build:
context: .
dockerfile: docker/Dockerfile.gateway
image: minicpm-gateway:dev
command:
- "--host"
- "0.0.0.0"
- "--port"
- "8006"
- "--https"
- "--ssl-certfile"
- "/app/certs/cert.pem"
- "--ssl-keyfile"
- "/app/certs/key.pem"
- "--workers"
- "cpp-worker-backend:22400"
ports:
- "${GATEWAY_HOST_PORT:-8006}:8006"
volumes:
- ${DATA_HOST_PATH:-./data}:/app/data
- ${CERTS_HOST_PATH:-./certs}:/app/certs:ro
depends_on:
cpp-worker-backend:
condition: service_healthy
restart: unless-stopped
logging:
driver: json-file
options: {max-size: "50m", max-file: "3"}