-
-
Notifications
You must be signed in to change notification settings - Fork 184
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
116 lines (112 loc) · 2.74 KB
/
docker-compose.yml
File metadata and controls
116 lines (112 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# =============================================================================
# Cake Distributed Cluster — Example Docker Compose
# =============================================================================
#
# Prerequisites:
# - NVIDIA Container Toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/)
# - Model data in ./cake-data/<model-name>/
# - Topology file at ./topology-docker.yml mapping layers to worker-1/worker-2
#
# Usage:
# docker compose up --build
#
# Example topology-docker.yml for LLaMA 3.2 1B (16 layers per worker):
#
# worker-1:
# host: 'worker-1:10128'
# description: 'CUDA Worker 1'
# layers:
# - 'model.layers.0-15'
#
# worker-2:
# host: 'worker-2:10128'
# description: 'CUDA Worker 2'
# layers:
# - 'model.layers.16-31'
#
# Note: Docker on macOS cannot access Metal GPUs. For Apple Silicon,
# build and run natively with: cargo build --release --features metal
# =============================================================================
services:
master:
build: .
image: cake
command:
- master
- --model
- /model
- --topology
- /config/topology.yml
- --api
- 0.0.0.0:8080
ports:
- "8080:8080"
volumes:
- ./cake-data/${CAKE_MODEL:-Llama-3.2-1B-Instruct}:/model:ro
- ./topology-docker.yml:/config/topology.yml:ro
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: on-failure
depends_on:
- worker-1
- worker-2
networks:
- cake-net
worker-1:
build: .
image: cake
command:
- worker
- --model
- /model
- --name
- worker-1
- --topology
- /config/topology.yml
- --address
- 0.0.0.0:10128
volumes:
- ./cake-data/${CAKE_MODEL:-Llama-3.2-1B-Instruct}:/model:ro
- ./topology-docker.yml:/config/topology.yml:ro
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
networks:
- cake-net
worker-2:
build: .
image: cake
command:
- worker
- --model
- /model
- --name
- worker-2
- --topology
- /config/topology.yml
- --address
- 0.0.0.0:10128
volumes:
- ./cake-data/${CAKE_MODEL:-Llama-3.2-1B-Instruct}:/model:ro
- ./topology-docker.yml:/config/topology.yml:ro
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
networks:
- cake-net
networks:
cake-net:
driver: bridge